From 7e3f9d0766de0cec4cddd6aa32cfb33e4a990d1a Mon Sep 17 00:00:00 2001
From: roll <roll@users.noreply.github.com>
Date: Tue, 18 Jul 2023 13:51:35 +0000
Subject: [PATCH] Apply automatic changes

---
 .gitignore                                    |    1 +
 404.html                                      | 3439 +++++++++++
 .../2022/08-22-frictionless-framework-v5.html | 4033 +++++++++++++
 blog/2022/09-07-github-integration.html       | 3529 +++++++++++
 blog/2022/11-07-zenodo-integration.html       | 3534 +++++++++++
 blog/index.html                               | 3518 +++++++++++
 data/capital.package.yaml                     |    1 -
 data/country.package.json                     |    1 -
 data/country.package.yaml                     |    1 -
 data/table-output.parq                        |  Bin 882 -> 882 bytes
 docs/advanced/design.html                     | 3467 +++++++++++
 docs/advanced/extending.html                  | 3631 ++++++++++++
 docs/advanced/system.html                     | 4141 +++++++++++++
 docs/basic-examples.html                      | 4141 +++++++++++++
 docs/checks/baseline.html                     | 3570 ++++++++++++
 docs/checks/cell.html                         | 4024 +++++++++++++
 docs/checks/row.html                          | 3632 ++++++++++++
 docs/checks/table.html                        | 3691 ++++++++++++
 docs/codebase/authors.html                    | 3507 +++++++++++
 docs/codebase/changelog.html                  | 4170 +++++++++++++
 docs/codebase/contributing.html               | 3621 ++++++++++++
 docs/codebase/license.html                    | 3479 +++++++++++
 docs/codebase/migration.html                  | 3506 +++++++++++
 docs/console/convert.html                     | 3518 +++++++++++
 docs/console/describe.html                    | 3549 +++++++++++
 docs/console/explore.html                     | 4544 +++++++++++++++
 docs/console/extract.html                     | 3528 +++++++++++
 docs/console/index.html                       | 3589 ++++++++++++
 docs/console/list.html                        | 3499 +++++++++++
 docs/console/overview.html                    | 3550 +++++++++++
 docs/console/publish.html                     | 3474 +++++++++++
 docs/console/query.html                       | 3489 +++++++++++
 docs/console/script.html                      | 3489 +++++++++++
 docs/console/validate.html                    | 3512 +++++++++++
 docs/errors/cell.html                         | 3856 ++++++++++++
 docs/errors/data.html                         | 3535 +++++++++++
 docs/errors/file.html                         | 3597 ++++++++++++
 docs/errors/header.html                       | 3589 ++++++++++++
 docs/errors/label.html                        | 3722 ++++++++++++
 docs/errors/metadata.html                     | 3960 +++++++++++++
 docs/errors/resource.html                     | 3660 ++++++++++++
 docs/errors/row.html                          | 3819 ++++++++++++
 docs/errors/table.html                        | 3713 ++++++++++++
 docs/fields/any.html                          | 3553 +++++++++++
 docs/fields/array.html                        | 3565 +++++++++++
 docs/fields/boolean.html                      | 3578 ++++++++++++
 docs/fields/date.html                         | 3553 +++++++++++
 docs/fields/datetime.html                     | 3553 +++++++++++
 docs/fields/duration.html                     | 3554 +++++++++++
 docs/fields/geojson.html                      | 3552 +++++++++++
 docs/fields/geopoint.html                     | 3552 +++++++++++
 docs/fields/integer.html                      | 3565 +++++++++++
 docs/fields/number.html                       | 3600 ++++++++++++
 docs/fields/object.html                       | 3553 +++++++++++
 docs/fields/string.html                       | 3561 +++++++++++
 docs/fields/time.html                         | 3553 +++++++++++
 docs/fields/year.html                         | 3553 +++++++++++
 docs/fields/yearmonth.html                    | 3553 +++++++++++
 docs/formats/csv.html                         | 3680 ++++++++++++
 docs/formats/erd.html                         | 3472 +++++++++++
 docs/formats/excel.html                       | 3660 ++++++++++++
 docs/formats/gsheets.html                     | 3601 ++++++++++++
 docs/formats/html.html                        | 3619 ++++++++++++
 docs/formats/inline.html                      | 3619 ++++++++++++
 docs/formats/json.html                        | 3646 ++++++++++++
 docs/formats/jsonschema.html                  | 3471 +++++++++++
 docs/formats/markdown.html                    | 3472 +++++++++++
 docs/formats/ods.html                         | 3600 ++++++++++++
 docs/formats/pandas.html                      | 3515 +++++++++++
 docs/formats/parquet.html                     | 3620 ++++++++++++
 docs/formats/spss.html                        | 3518 +++++++++++
 docs/formats/sql.html                         | 3718 ++++++++++++
 docs/formats/yaml.html                        | 3630 ++++++++++++
 docs/formats/zip.html                         | 3472 +++++++++++
 docs/framework/actions.html                   | 3795 ++++++++++++
 docs/framework/catalog.html                   | 3888 ++++++++++++
 docs/framework/checklist.html                 | 3814 ++++++++++++
 docs/framework/detector.html                  | 4045 +++++++++++++
 docs/framework/dialect.html                   | 3990 +++++++++++++
 docs/framework/error.html                     | 3583 ++++++++++++
 docs/framework/inquiry.html                   | 3850 ++++++++++++
 docs/framework/package.html                   | 4234 ++++++++++++++
 docs/framework/pipeline.html                  | 3783 ++++++++++++
 docs/framework/report.html                    | 4027 +++++++++++++
 docs/framework/resource.html                  | 4701 +++++++++++++++
 docs/framework/schema.html                    | 4256 ++++++++++++++
 docs/framework/table.html                     | 3749 ++++++++++++
 docs/getting-started.html                     | 3783 ++++++++++++
 docs/guides/describing-data.html              | 4792 +++++++++++++++
 docs/guides/extracting-data.html              | 4120 +++++++++++++
 docs/guides/transforming-data.html            | 3751 ++++++++++++
 docs/guides/validating-data.html              | 4798 +++++++++++++++
 docs/portals/ckan.html                        | 3916 +++++++++++++
 docs/portals/github.html                      | 4058 +++++++++++++
 docs/portals/zenodo.html                      | 4201 +++++++++++++
 docs/resources/file.html                      | 3492 +++++++++++
 docs/resources/json.html                      | 3512 +++++++++++
 docs/resources/table.html                     | 3516 +++++++++++
 docs/resources/text.html                      | 3514 +++++++++++
 docs/schemes/aws.html                         | 3594 ++++++++++++
 docs/schemes/buffer.html                      | 3514 +++++++++++
 docs/schemes/local.html                       | 3520 +++++++++++
 docs/schemes/multipart.html                   | 3584 ++++++++++++
 docs/schemes/remote.html                      | 3608 ++++++++++++
 docs/schemes/stream.html                      | 3524 +++++++++++
 docs/steps/cell.html                          | 4217 +++++++++++++
 docs/steps/field.html                         | 4622 +++++++++++++++
 docs/steps/resource.html                      | 3891 ++++++++++++
 docs/steps/row.html                           | 4326 ++++++++++++++
 docs/steps/table.html                         | 5191 +++++++++++++++++
 docs/universe.html                            | 3472 +++++++++++
 index.html                                    | 3481 +++++++++++
 112 files changed, 400249 insertions(+), 3 deletions(-)
 create mode 100644 404.html
 create mode 100644 blog/2022/08-22-frictionless-framework-v5.html
 create mode 100644 blog/2022/09-07-github-integration.html
 create mode 100644 blog/2022/11-07-zenodo-integration.html
 create mode 100644 blog/index.html
 create mode 100644 docs/advanced/design.html
 create mode 100644 docs/advanced/extending.html
 create mode 100644 docs/advanced/system.html
 create mode 100644 docs/basic-examples.html
 create mode 100644 docs/checks/baseline.html
 create mode 100644 docs/checks/cell.html
 create mode 100644 docs/checks/row.html
 create mode 100644 docs/checks/table.html
 create mode 100644 docs/codebase/authors.html
 create mode 100644 docs/codebase/changelog.html
 create mode 100644 docs/codebase/contributing.html
 create mode 100644 docs/codebase/license.html
 create mode 100644 docs/codebase/migration.html
 create mode 100644 docs/console/convert.html
 create mode 100644 docs/console/describe.html
 create mode 100644 docs/console/explore.html
 create mode 100644 docs/console/extract.html
 create mode 100644 docs/console/index.html
 create mode 100644 docs/console/list.html
 create mode 100644 docs/console/overview.html
 create mode 100644 docs/console/publish.html
 create mode 100644 docs/console/query.html
 create mode 100644 docs/console/script.html
 create mode 100644 docs/console/validate.html
 create mode 100644 docs/errors/cell.html
 create mode 100644 docs/errors/data.html
 create mode 100644 docs/errors/file.html
 create mode 100644 docs/errors/header.html
 create mode 100644 docs/errors/label.html
 create mode 100644 docs/errors/metadata.html
 create mode 100644 docs/errors/resource.html
 create mode 100644 docs/errors/row.html
 create mode 100644 docs/errors/table.html
 create mode 100644 docs/fields/any.html
 create mode 100644 docs/fields/array.html
 create mode 100644 docs/fields/boolean.html
 create mode 100644 docs/fields/date.html
 create mode 100644 docs/fields/datetime.html
 create mode 100644 docs/fields/duration.html
 create mode 100644 docs/fields/geojson.html
 create mode 100644 docs/fields/geopoint.html
 create mode 100644 docs/fields/integer.html
 create mode 100644 docs/fields/number.html
 create mode 100644 docs/fields/object.html
 create mode 100644 docs/fields/string.html
 create mode 100644 docs/fields/time.html
 create mode 100644 docs/fields/year.html
 create mode 100644 docs/fields/yearmonth.html
 create mode 100644 docs/formats/csv.html
 create mode 100644 docs/formats/erd.html
 create mode 100644 docs/formats/excel.html
 create mode 100644 docs/formats/gsheets.html
 create mode 100644 docs/formats/html.html
 create mode 100644 docs/formats/inline.html
 create mode 100644 docs/formats/json.html
 create mode 100644 docs/formats/jsonschema.html
 create mode 100644 docs/formats/markdown.html
 create mode 100644 docs/formats/ods.html
 create mode 100644 docs/formats/pandas.html
 create mode 100644 docs/formats/parquet.html
 create mode 100644 docs/formats/spss.html
 create mode 100644 docs/formats/sql.html
 create mode 100644 docs/formats/yaml.html
 create mode 100644 docs/formats/zip.html
 create mode 100644 docs/framework/actions.html
 create mode 100644 docs/framework/catalog.html
 create mode 100644 docs/framework/checklist.html
 create mode 100644 docs/framework/detector.html
 create mode 100644 docs/framework/dialect.html
 create mode 100644 docs/framework/error.html
 create mode 100644 docs/framework/inquiry.html
 create mode 100644 docs/framework/package.html
 create mode 100644 docs/framework/pipeline.html
 create mode 100644 docs/framework/report.html
 create mode 100644 docs/framework/resource.html
 create mode 100644 docs/framework/schema.html
 create mode 100644 docs/framework/table.html
 create mode 100644 docs/getting-started.html
 create mode 100644 docs/guides/describing-data.html
 create mode 100644 docs/guides/extracting-data.html
 create mode 100644 docs/guides/transforming-data.html
 create mode 100644 docs/guides/validating-data.html
 create mode 100644 docs/portals/ckan.html
 create mode 100644 docs/portals/github.html
 create mode 100644 docs/portals/zenodo.html
 create mode 100644 docs/resources/file.html
 create mode 100644 docs/resources/json.html
 create mode 100644 docs/resources/table.html
 create mode 100644 docs/resources/text.html
 create mode 100644 docs/schemes/aws.html
 create mode 100644 docs/schemes/buffer.html
 create mode 100644 docs/schemes/local.html
 create mode 100644 docs/schemes/multipart.html
 create mode 100644 docs/schemes/remote.html
 create mode 100644 docs/schemes/stream.html
 create mode 100644 docs/steps/cell.html
 create mode 100644 docs/steps/field.html
 create mode 100644 docs/steps/resource.html
 create mode 100644 docs/steps/row.html
 create mode 100644 docs/steps/table.html
 create mode 100644 docs/universe.html
 create mode 100644 index.html

diff --git a/.gitignore b/.gitignore
index 088d28f607..c24a2d10aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,3 +95,4 @@ coverage/
 site/
 tmp/
 .vim
+!**/*.html
diff --git a/404.html b/404.html
new file mode 100644
index 0000000000..7248050755
--- /dev/null
+++ b/404.html
@@ -0,0 +1,3439 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="This page is not found">
+<meta name="keywords" content="not,found">
+<link rel="icon" href="assets/logo.png">
+<title>Not Found | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+
+<script>
+for (const item of JSON.parse('[{"next": "https://v4.framework.frictionlessdata.io/docs/guides/introduction", "prev": "https://framework.frictionlessdata.io/docs/guides/introduction"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/quick-start", "prev": "https://framework.frictionlessdata.io/docs/guides/quick-start"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/basic-examples", "prev": "https://framework.frictionlessdata.io/docs/guides/basic-examples"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/describing-data", "prev": "https://framework.frictionlessdata.io/docs/guides/describing-data"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/extracting-data", "prev": "https://framework.frictionlessdata.io/docs/guides/extracting-data"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/validation-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/validation-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/validation-checks", "prev": "https://framework.frictionlessdata.io/docs/guides/validation-checks"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/transform-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/transform-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/transform-steps", "prev": "https://framework.frictionlessdata.io/docs/guides/transform-steps"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/package-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/package-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/resource-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/resource-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/schema-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/schema-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/layout-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/layout-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/detector-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/detector-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/guides/framework/report-guide", "prev": "https://framework.frictionlessdata.io/docs/guides/framework/report-guide"}, {"next": "https://v4.framework.frictionlessdata.io/docs/development/contributing", "prev": "https://framework.frictionlessdata.io/docs/development/contributing"}]')) {
+  if (location.href === item.prev) {
+    location.href = item.next;
+  } else if (location.pathname === `/${item.prev}.html`) {
+    location.href = `/${item.next}.html`;
+  }
+}
+</script>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href=".">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/404.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Not Found</h1>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This page is not found
+  </div>
+</div></div><p>Return to the <a href="/">home</a> page.</p>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': 'index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': 'docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': 'docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'docs/errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'docs/errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'docs/errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'docs/errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'docs/errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'docs/errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'docs/errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'docs/errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'docs/errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': 'docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': 'blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': 'blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': 'blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': 'blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/blog/2022/08-22-frictionless-framework-v5.html b/blog/2022/08-22-frictionless-framework-v5.html
new file mode 100644
index 0000000000..5d156019af
--- /dev/null
+++ b/blog/2022/08-22-frictionless-framework-v5.html
@@ -0,0 +1,4033 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="We're releasing a first beta of Frictionless Framework (v5)!">
+<meta name="keywords" content="welcome,frictionless,framework,(v5)">
+<link rel="icon" href="../../assets/logo.png">
+<title>Welcome Frictionless Framework (v5) | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/blog/2022/08-22-frictionless-framework-v5.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Welcome Frictionless Framework (v5)</h1>
+<p>
+<p id="livemark-blog-info">
+  <strong>By Evgeny Karev on 2022-08-22</strong> »
+  <a href="../index.html">Blog Index</a>
+</p>We're releasing a first beta of Frictionless Framework (v5)!</p>
+<p>Since the initial Frictionless Framework release we'd been collecting feedback and analyzing both high-level users' needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let's go through the main improvements we have made:</p>
+<h2>Improved Metadata</h2>
+<p>This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the <a href="https://specs.frictionlessdata.io/">Frictionless Standards</a>. For both we need well-defined and an easy-to-understand metadata model. Partially it's already published as standards like Table Schema and partially it's going to be published as standards like File Dialect and possibly validation/transform metadata.</p>
+<h3>Dialect</h3>
+<p>In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it's merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">header: true
+headerRows: [2, 3]
+commentChar: '#'
+csv:
+  delimiter: ';'
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-1-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Dialect, Control, formats
+
+dialect = Dialect(header=True, header_rows=[2, 3], comment_char='#')
+dialect.add_control(formats.CsvControl(delimiter=';'))
+print(dialect)
+</code></pre>
+
+  </div>
+  </div><p>A dialect descriptor can be saved and reused within a resource. Technically, it's possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it's possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it's possible to provide CSV properties on the root level:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">header: true
+delimiter: ';'
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Dialect, Control, formats
+
+dialect = Dialect.from_descriptor({"header": True, "delimiter": ';'})
+print(dialect)
+</code></pre>
+
+  </div>
+  </div><p>For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as <code>skip/pick/limit/offsetFields/etc</code>. It's possible to achieve the same results using the Pipeline concept as a part of the transformation workflow.</p>
+<p>Read an article about <a href="../../docs/framework/dialect.html">Dialect Class</a> for more information.</p>
+<h3>Checklist</h3>
+<p>Checklist is a new concept introduced in v5. It's basically a collection of validation steps and a few other settings to make "validation rules" sharable. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">checks:
+  - type: ascii-value
+  - type: row_constraint
+    formula: id &gt; 1
+skipErrors:
+  - duplicate-label
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-3-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Checklist, checks
+
+checklist = Checklist(
+    checks=[checks.ascii_value(), checks.row_constraint(formula='id &gt; 1')],
+    skip_errors=['duplicate-label'],
+)
+print(checklist)
+</code></pre>
+
+  </div>
+  </div><p>Having and sharing this checklist it's possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality "libraries" within projects or domains. We can use a checklist for validation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate table1.csv --checklist checklist.yaml
+frictionless validate table2.csv --checklist checklist.yaml
+</code></pre>
+
+  </div>
+  </div><p>Here is a list of another changes:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Check(descriptor)</td>
+<td>Check.from_descriptor(descriptor)</td>
+</tr>
+<tr>
+<td>check.code</td>
+<td>check.type</td>
+</tr>
+</tbody></table><p>Read an article about <a href="../../docs/framework/checklist.html">Checklist Class</a> for more information.</p>
+<h3>Pipeline</h3>
+<p>In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">steps:
+  - type: table-normalize
+  - type: cell-set
+    fieldName: version
+    value: v5
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-5-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Pipeline, steps
+
+pipeline = Pipeline(
+    steps=[steps.table_normalize(), steps.cell_set(field_name='version', value='v5')],
+)
+print(pipeline)
+</code></pre>
+
+  </div>
+  </div><p>Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless transform table1.csv --pipeline pipeline.yaml
+frictionless transform table2.csv --pipeline pipeline.yaml
+</code></pre>
+
+  </div>
+  </div><p>Here is a list of another changes:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Step(descriptor)</td>
+<td>Step.from_descriptor(descriptor)</td>
+</tr>
+<tr>
+<td>step.code</td>
+<td>step.type</td>
+</tr>
+</tbody></table><p>Read an article about <a href="../../docs/framework/pipeline.html">Pipeline Class</a> for more information.</p>
+<h3>Resource</h3>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    Since <code>frictionless@5.7</code> this experimental feature (<code>resource.checklist/pipeline</code>) has been disabled to conform better with the standards.
+  </div>
+</div></div><p>There are no changes in the Resource related to the standards although currently by default instead of <code>profile</code> the <code>type</code> property will be used to mark a resource as a table. It can be changed using the <code>--standards v1</code> flag.</p>
+<p>It's now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">path: table.csv
+# ...
+checklist:
+  checks:
+    - type: ascii-value
+    - type: row_constraint
+      formula: id &gt; 1
+pipeline: pipeline.yaml
+  steps:
+    - type: table-normalize
+    - type: cell-set
+      fieldName: version
+      value: v5
+</code></pre>
+
+  </div>
+  </div><p>Or using dereference:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">path: table.csv
+# ...
+checklist: checklist.yaml
+pipeline: pipeline.yaml
+</code></pre>
+
+  </div>
+  </div><p>In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate resource.yaml  # will use the checklist above
+frictionless transform resource.yaml  # will use the pipeline above
+</code></pre>
+
+  </div>
+  </div><p>There are minor changes in the <code>stats</code> property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it's not possible to change for performance reasons as it was in v4):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('table.csv', stats=True)
+print(resource.stats)
+</code></pre>
+
+  </div>
+  </div><p>Here is a list of another changes:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>for row in resource:</td>
+<td>for row in resource.row_stream</td>
+</tr>
+</tbody></table><p>Read an article about <a href="../../docs/framework/resource.html">Resource Class</a> for more information.</p>
+<h3>Package</h3>
+<p>There are no changes in the Package related to the standards although it's now possible to use resource dereference:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">name: package
+resources:
+  - resource1.yaml
+  - resource2.yaml
+</code></pre>
+
+  </div>
+  </div><p>Read an article about <a href="../../docs/framework/package.html">Package Class</a> for more information.</p>
+<h3>Catalog</h3>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    Since <code>frictionless@5.7</code> this experimental feature is changes and now it requires <code>catalog.datasets[].package</code> structure.
+  </div>
+</div></div><p>Catalog is a new concept that is a collection of data packages that can be written inline or using dereference:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">name: catalog
+packages:
+  - package1.yaml
+  - package2.yaml
+</code></pre>
+
+  </div>
+  </div><p>Read an article about <a href="../../docs/framework/catalog.html">Catalog Class</a> for more information.</p>
+<h3>Detector</h3>
+<p>Detector is now a metadata class (it wasn't in v4) so it can be saved and shared as other metadata classes:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector
+
+detector = Detector(sample_size=1000)
+print(detector)
+</code></pre>
+
+  </div>
+  </div><p>Read an article about <a href="../../docs/framework/detector.html">Detector Class</a> for more information.</p>
+<h3>Inquiry</h3>
+<p>There are few changes in the Inquiry concept which is known for using in the <a href="https://repository.frictionlessdata.io/">Frictionless Repository</a> project:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>inquiryTask.source</td>
+<td>inquiryTask.path</td>
+</tr>
+<tr>
+<td>inquiryTask.source</td>
+<td>inquiryTask.resource</td>
+</tr>
+<tr>
+<td>inquiryTask.source</td>
+<td>inquiryTask.package</td>
+</tr>
+</tbody></table><p>Read an article about <a href="../../docs/framework/inquiry.html">Inquiry Class</a> for more information.</p>
+<h3>Report</h3>
+<p>The Report concept has been significantly simplified by removing the <code>resource</code> property from <code>reportTask</code>. It's been replaced by <code>name/type/place/labels</code> properties. Also <code>report.time</code> is now <code>report.stats.seconds</code>. The <code>report/reportTask.warnings: List[str]</code> have been added to provide non-error information like reached limits:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate table.csv --yaml
+</code></pre>
+
+  </div>
+  </div><p>Here is a list of changes:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>report.time</td>
+<td>report.stats.seconds</td>
+</tr>
+<tr>
+<td>reportTask.time</td>
+<td>reportTask.stats.seconds</td>
+</tr>
+<tr>
+<td>reportTask.resource.name</td>
+<td>reportTask.name</td>
+</tr>
+<tr>
+<td>reportTask.resource.profile</td>
+<td>reportTask.type</td>
+</tr>
+<tr>
+<td>reportTask.resource.path</td>
+<td>reportTask.place</td>
+</tr>
+<tr>
+<td>reportTask.resource.schema</td>
+<td>reportTask.labels</td>
+</tr>
+</tbody></table><p>Read an article about <a href="../../docs/framework/report.html">Report Class</a> for more information.</p>
+<h3>Schema</h3>
+<p>Changes in the Schema class:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Schema(descriptor)</td>
+<td>Schema.from_descriptor(descriptor)</td>
+</tr>
+</tbody></table><h3>Error</h3>
+<p>There are a few changes in the Error data structure:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>error.code</td>
+<td>error.type</td>
+</tr>
+<tr>
+<td>error.name</td>
+<td>error.title</td>
+</tr>
+<tr>
+<td>error.rowPosition</td>
+<td>error.rowNumber</td>
+</tr>
+<tr>
+<td>error.fieldPosition</td>
+<td>error.fieldNumber</td>
+</tr>
+</tbody></table><h3>Types</h3>
+<p>Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the <code>type</code> property to provide type information:</p>
+<table>
+<thead>
+<tr>
+<th>From (v4)</th>
+<th>To (v5)</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>resource.profile</td>
+<td>resource.type</td>
+</tr>
+<tr>
+<td>check.code</td>
+<td>check.type</td>
+</tr>
+<tr>
+<td>control.code</td>
+<td>control.type</td>
+</tr>
+<tr>
+<td>error.code</td>
+<td>error.type</td>
+</tr>
+<tr>
+<td>field.type</td>
+<td>field.type</td>
+</tr>
+<tr>
+<td>step.type</td>
+<td>step.type</td>
+</tr>
+</tbody></table><p>The new v5 version still supports old notation in descriptors for backward-compatibility.</p>
+<h2>Improved Model</h2>
+<p>It's been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of <code>datapackage</code> library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone.</p>
+<p>In Framework v5 we finally decided to follow the "right way" for handling this problem and split descriptors and object model completely.</p>
+<h3>Descriptors</h3>
+<p>In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-YAML" class="nav-link active" data-toggle="tab">
+      YAML
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-YAML" class="tab-pane fade active show">
+    <pre><code class="language-yaml">fields:
+  - name: id
+    type: integer
+  - name: name
+    type: string
+</code></pre>
+
+  </div>
+  </div><h3>Object Model</h3>
+<p>The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema
+
+schema = Schema.from_descriptor('schema.yaml')
+# Here we deal with a proper object model
+descriptor = schema.to_descriptor()
+# Here we export it back to be a descriptor
+</code></pre>
+
+  </div>
+  </div><p>There are a few important traits of the new model:</p>
+<ul>
+<li>it's not possible to create a metadata instance from an invalid descriptor</li>
+<li>it's almost always guaranteed that a metadata instance is valid</li>
+<li>it's not possible to mix dicts and classes in methods like <code>package.add_resource</code></li>
+<li>it's not possible to export an invalid descriptor</li>
+</ul>
+<p>This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It's especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like <code>validate</code> function that are more forgiving regarding user input.</p>
+<h3>Static Typing</h3>
+<p>One of the most important consequences of "fixing" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass <code>pyright</code> validation. We highly recommend enabling <code>pyright</code> in your IDE to see all the type problems in-advance:</p>
+<div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/type-error.png">
+</div></div><h2>Livemark Docs</h2>
+<p>We're happy to announce that we're finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark's ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs.</p>
+<h3>Script Execution</h3>
+<div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/livemark-1.png">
+</div></div><h3>Reference Generation</h3>
+<div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/livemark-2.png">
+</div></div><h3>Happy Contributors</h3>
+<p>We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let's chat in our <a href="https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg">Slack</a> if you have questions or just want to say hi.</p>
+<p>Read <a href="https://livemark.frictionlessdata.io/">Livemark Docs</a> for more information.</p>
+
+<div id="livemark-signs">
+  <div>
+            <div class="prev">
+      <a href="09-07-github-integration.html">
+        « Github Integration
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../../docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../../docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../../docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../../docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../../docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../../docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../../docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../../docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../../docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../../docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../../docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../../docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../../docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../../docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../../docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../../docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../../docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../../docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../../docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../../docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../../docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../../docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../../docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../../docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../../docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../../docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../../docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../../docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../../docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../../docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../../docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../../docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../../docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../../docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../../docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../../docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../../docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../../docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../../docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../../docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../../docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../../docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../../docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../../docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../../docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../../docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../../docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../../docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../../docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../../docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../../docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../../docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../../docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../../docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../../docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../../docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../../docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../../docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../../docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../../docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../../docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../../docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../../docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../../docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../../docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../../docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../../docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../../docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../../docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../../docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../../docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../../docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../../docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../../docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../../docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../../docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../../docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../../docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../../docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../../docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../../docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../../docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../../docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../../docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../../docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../../docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../../docs/errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../../docs/errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../../docs/errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../../docs/errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../../docs/errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../../docs/errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../../docs/errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../../docs/errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../../docs/errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../../docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../../docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../../docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../../docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../../docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../../docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../index',
+          'text': "# Blog ```html markup Zenodo Integration By Shashi Gharti on 2022-11-07 This blog gives the introduction of the zenodo plugin which helps to easily read data from and write data to Zenodo. Read more \u00bb Github Integration By Shashi Gharti on 2022-09-07 This blog gives the introduction of the github plugin which helps to seamlessly transfer/read data to/from Github. Read more \u00bb Welcome Frictionless Framework (v5) By Evgeny Karev on 2022-08-22 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shorcomings and areas that can be improved in the next version of the framework. Read more \u00bb ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/blog/2022/09-07-github-integration.html b/blog/2022/09-07-github-integration.html
new file mode 100644
index 0000000000..49f41ca13a
--- /dev/null
+++ b/blog/2022/09-07-github-integration.html
@@ -0,0 +1,3529 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features.">
+<meta name="keywords" content="github,integration">
+<link rel="icon" href="../../assets/logo.png">
+<title>Github Integration | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/blog/2022/09-07-github-integration.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Github Integration</h1>
+<p>
+<p id="livemark-blog-info">
+  <strong>By Shashi Gharti on 2022-09-07</strong> »
+  <a href="../index.html">Blog Index</a>
+</p>We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features.</p>
+<h2>Reading from the repo</h2>
+<p>Reading package from github repository is made easy! The existing <code>Package</code> class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package("https://github.com/fdtester/test-repo-with-datapackage-json")
+print(package)
+</code></pre>
+
+  </div>
+  </div><h2>Writing/Publishing to the repo</h2>
+<p>Writing and publishing can be easily done by passing the repository link using <code>publish</code> function.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, portals
+
+apikey = 'YOUR-GITHUB-API-KEY'
+package = Package('data/datapackage.json')
+response = package.publish("https://github.com/fdtester/test-repo-write",
+        control=portals.GithubControl(apikey=apikey)
+    )
+</code></pre>
+
+  </div>
+  </div><h2>Creating catalog</h2>
+<p>Catalog can be created from a single repository by using 'search' queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Catalog, portals
+
+catalog = Catalog(
+        control=portals.GithubControl(search="user:fdtester", per_page=1, page=1),
+    )
+</code></pre>
+
+  </div>
+  </div><h3>Happy Contributors</h3>
+<p>We will have more updates in future and would love to hear from you about this new feature. Let's chat in our <a href="https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg">Slack</a> if you have questions or just want to say hi.</p>
+<p>Read <a href="../../docs/portals/github.html">Github Plugin Docs</a> for more information.</p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="08-22-frictionless-framework-v5.html">
+        Welcome Frictionless Framework (v5) »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="11-07-zenodo-integration.html">
+        « Zenodo Integration
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../../docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../../docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../../docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../../docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../../docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../../docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../../docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../../docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../../docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../../docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../../docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../../docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../../docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../../docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../../docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../../docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../../docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../../docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../../docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../../docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../../docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../../docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../../docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../../docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../../docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../../docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../../docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../../docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../../docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../../docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../../docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../../docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../../docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../../docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../../docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../../docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../../docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../../docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../../docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../../docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../../docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../../docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../../docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../../docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../../docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../../docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../../docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../../docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../../docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../../docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../../docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../../docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../../docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../../docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../../docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../../docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../../docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../../docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../../docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../../docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../../docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../../docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../../docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../../docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../../docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../../docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../../docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../../docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../../docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../../docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../../docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../../docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../../docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../../docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../../docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../../docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../../docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../../docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../../docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../../docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../../docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../../docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../../docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../../docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../../docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../../docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../../docs/errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../../docs/errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../../docs/errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../../docs/errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../../docs/errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../../docs/errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../../docs/errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../../docs/errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../../docs/errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../../docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../../docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../../docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../../docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../../docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../../docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../index',
+          'text': "# Blog ```html markup Zenodo Integration By Shashi Gharti on 2022-11-07 This blog gives the introduction of the zenodo plugin which helps to easily read data from and write data to Zenodo. Read more \u00bb Github Integration By Shashi Gharti on 2022-09-07 This blog gives the introduction of the github plugin which helps to seamlessly transfer/read data to/from Github. Read more \u00bb Welcome Frictionless Framework (v5) By Evgeny Karev on 2022-08-22 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shorcomings and areas that can be improved in the next version of the framework. Read more \u00bb ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/blog/2022/11-07-zenodo-integration.html b/blog/2022/11-07-zenodo-integration.html
new file mode 100644
index 0000000000..be9b00c07a
--- /dev/null
+++ b/blog/2022/11-07-zenodo-integration.html
@@ -0,0 +1,3534 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples.">
+<meta name="keywords" content="zenodo,integration">
+<link rel="icon" href="../../assets/logo.png">
+<title>Zenodo Integration | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../../docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../../docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../../docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/blog/2022/11-07-zenodo-integration.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Zenodo Integration</h1>
+<p>
+<p id="livemark-blog-info">
+  <strong>By Shashi Gharti on 2022-11-07</strong> »
+  <a href="../index.html">Blog Index</a>
+</p>Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples.</p>
+<h2>Reading from the repo</h2>
+<p>You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing <code>Package</code> class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package("https://zenodo.org/record/7078760")
+print(package)
+</code></pre>
+
+  </div>
+  </div><p>Once read you can apply all the available functions to the package such as validation, transformation etc.</p>
+<h2>Writing/Publishing to the repo</h2>
+<p>To write the package we can simply use <code>publish</code> function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, portals
+
+control = portals.ZenodoControl(
+       metafn="data/zenodo/metadata.json",
+       apikey=apikey
+)
+package = Package("data/datapackage.json")
+deposition_id = package.publish(control=control)
+print(deposition_id)
+
+</code></pre>
+
+  </div>
+  </div><p>Once the package is published, deposition_id will be returned.</p>
+<h2>Creating catalog</h2>
+<p>Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Catalog, portals
+control=portals.ZenodoControl(search='title:"open science"')
+catalog = Catalog(
+        control=control,
+    )
+</code></pre>
+
+  </div>
+  </div><h3>Happy Contributors</h3>
+<p>We will have more updates in future and would love to hear from you about this new feature. Let's chat in our <a href="https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg">Slack</a> if you have questions or just want to say hi.</p>
+<p>Read <a href="../../docs/portals/zenodo.html">Zenodo Plugin Docs</a> for more information.</p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="09-07-github-integration.html">
+        Github Integration »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../index.html">
+        « Blog
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../../docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../../docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../../docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../../docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../../docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../../docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../../docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../../docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../../docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../../docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../../docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../../docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../../docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../../docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../../docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../../docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../../docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../../docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../../docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../../docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../../docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../../docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../../docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../../docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../../docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../../docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../../docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../../docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../../docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../../docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../../docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../../docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../../docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../../docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../../docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../../docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../../docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../../docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../../docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../../docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../../docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../../docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../../docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../../docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../../docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../../docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../../docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../../docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../../docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../../docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../../docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../../docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../../docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../../docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../../docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../../docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../../docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../../docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../../docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../../docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../../docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../../docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../../docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../../docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../../docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../../docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../../docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../../docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../../docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../../docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../../docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../../docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../../docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../../docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../../docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../../docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../../docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../../docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../../docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../../docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../../docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../../docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../../docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../../docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../../docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../../docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../../docs/errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../../docs/errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../../docs/errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../../docs/errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../../docs/errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../../docs/errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../../docs/errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../../docs/errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../../docs/errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../../docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../../docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../../docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../../docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../../docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../../docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../index',
+          'text': "# Blog ```html markup Zenodo Integration By Shashi Gharti on 2022-11-07 This blog gives the introduction of the zenodo plugin which helps to easily read data from and write data to Zenodo. Read more \u00bb Github Integration By Shashi Gharti on 2022-09-07 This blog gives the introduction of the github plugin which helps to seamlessly transfer/read data to/from Github. Read more \u00bb Welcome Frictionless Framework (v5) By Evgeny Karev on 2022-08-22 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shorcomings and areas that can be improved in the next version of the framework. Read more \u00bb ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/blog/index.html b/blog/index.html
new file mode 100644
index 0000000000..b57471930a
--- /dev/null
+++ b/blog/index.html
@@ -0,0 +1,3518 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="blog">
+<link rel="icon" href="../assets/logo.png">
+<title>Blog | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  active">
+      <a href="index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/blog/index.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Blog</h1>
+<div><div class="livemark-blog-item">
+  <h2><a href="2022/11-07-zenodo-integration.html">Zenodo Integration</a></h2>
+  <div class="row">
+    <div class="col-8">
+      <p>
+        <strong>
+          By Shashi Gharti
+          on 2022-11-07
+        </strong>
+      </p>
+      This blog gives the introduction of the zenodo plugin which helps to easily read data from and write data to Zenodo.
+      <a href="2022/11-07-zenodo-integration.html">Read more »</a>
+    </div>
+    <div class="col-4">
+      <img src="../assets/zenodo.png">
+    </div>
+  </div>
+</div>
+<div class="livemark-blog-item">
+  <h2><a href="2022/09-07-github-integration.html">Github Integration</a></h2>
+  <div class="row">
+    <div class="col-8">
+      <p>
+        <strong>
+          By Shashi Gharti
+          on 2022-09-07
+        </strong>
+      </p>
+      This blog gives the introduction of the github plugin which helps to seamlessly transfer/read data to/from Github.
+      <a href="2022/09-07-github-integration.html">Read more »</a>
+    </div>
+    <div class="col-4">
+      <img src="../assets/github.png">
+    </div>
+  </div>
+</div>
+<div class="livemark-blog-item">
+  <h2><a href="2022/08-22-frictionless-framework-v5.html">Welcome Frictionless Framework (v5)</a></h2>
+  <div class="row">
+    <div class="col-8">
+      <p>
+        <strong>
+          By Evgeny Karev
+          on 2022-08-22
+        </strong>
+      </p>
+      Since the initial Frictionless Framework release we'd been collecting feedback and analyzing both high-level users' needs and bug reports to identify shorcomings and areas that can be improved in the next version of the framework.
+      <a href="2022/08-22-frictionless-framework-v5.html">Read more »</a>
+    </div>
+    <div class="col-4">
+      <img src="../assets/logo-bright.png">
+    </div>
+  </div>
+</div>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="2022/11-07-zenodo-integration.html">
+        Zenodo Integration »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../docs/universe.html">
+        « Universe
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../docs/errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../docs/errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../docs/errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../docs/errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../docs/errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../docs/errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../docs/errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../docs/errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../docs/errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': 'index',
+          'text': "# Blog ```html markup Zenodo Integration By Shashi Gharti on 2022-11-07 This blog gives the introduction of the zenodo plugin which helps to easily read data from and write data to Zenodo. Read more \u00bb Github Integration By Shashi Gharti on 2022-09-07 This blog gives the introduction of the github plugin which helps to seamlessly transfer/read data to/from Github. Read more \u00bb Welcome Frictionless Framework (v5) By Evgeny Karev on 2022-08-22 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shorcomings and areas that can be improved in the next version of the framework. Read more \u00bb ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/data/capital.package.yaml b/data/capital.package.yaml
index 1804f620ba..dfc3b86396 100644
--- a/data/capital.package.yaml
+++ b/data/capital.package.yaml
@@ -1,4 +1,3 @@
-$frictionless: package/v2
 resources:
   - name: capital-invalid
     type: table
diff --git a/data/country.package.json b/data/country.package.json
index 38a22d8b52..913783f56b 100644
--- a/data/country.package.json
+++ b/data/country.package.json
@@ -1,5 +1,4 @@
 {
-  "$frictionless": "package/v2",
   "resources": [
     {
       "name": "capital-3",
diff --git a/data/country.package.yaml b/data/country.package.yaml
index 186238b966..1c299477e9 100644
--- a/data/country.package.yaml
+++ b/data/country.package.yaml
@@ -1,4 +1,3 @@
-$frictionless: package/v2
 title: Countries and their capitals
 description: The data was collected as a research project
 resources:
diff --git a/data/table-output.parq b/data/table-output.parq
index b26d1e83abc4db6d4445d6ddaeb532a6cdcea9f0..ba6de54db6b5705ca965c26281f822bd3425c9f1 100644
GIT binary patch
delta 44
zcmeyw_K9u7N+w40$*Y+1nT_-eCd)88h?(mdC}<>=X6B?Q7-%vuFf%X&I0hL404;+H
AN&o-=

delta 44
zcmeyw_K9u7N+w35$*Y+1nGN+!C(AH9h#Bb_C}<>=X6B?Q7-%vuFf%X&I0hL404&c7
AL;wH)

diff --git a/docs/advanced/design.html b/docs/advanced/design.html
new file mode 100644
index 0000000000..b36e3a7ad6
--- /dev/null
+++ b/docs/advanced/design.html
@@ -0,0 +1,3467 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users.">
+<meta name="keywords" content="design">
+<link rel="icon" href="../../assets/logo.png">
+<title>Design | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/advanced/design.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Design</h1>
+<p>This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users.</p>
+<h2>Reading Flow</h2>
+<p>Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics:</p>
+<p><img src="../../assets/reading.png" alt="Reading"></p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="system.html">
+        System »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../framework/error.html">
+        « Error Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/advanced/extending.html b/docs/advanced/extending.html
new file mode 100644
index 0000000000..7ad76e4dd3
--- /dev/null
+++ b/docs/advanced/extending.html
@@ -0,0 +1,3631 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework.">
+<meta name="keywords" content="extension">
+<link rel="icon" href="../../assets/logo.png">
+<title>Extension | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/advanced/extending.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Extension</h1>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This feature is currently experimental. The API might change without notice
+  </div>
+</div></div><p>Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework.</p>
+<h2>Creating Plugin</h2>
+<p>To create a plugin you need:</p>
+<ul>
+<li>create a module called <code>frictionless_&lt;name&gt;</code> available in PYTHONPATH</li>
+<li>subclass the Plugin class and override one of the methods above</li>
+</ul>
+<p>Please consult with <a href="system.html">System/Plugin</a> for in-detail information about the Plugin interface and how these methods can be implemented.</p>
+<h2>Plugin Example</h2>
+<p>Let's say we're interested in supporting the <code>csv2k</code> format that we have just invented. For simplicity, let's use a format that is exactly the same with CSV.</p>
+<p>First of all, we need to create a <code>frictionless_csv2k</code> module containing a Plugin implementation and a Parser implementation but we're going to re-use the CsvParser as our new format is the same:</p>
+<blockquote>
+<p>frictionless_csv2k.py</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Plugin, system
+from frictionless.plugins.csv import CsvParser
+
+class Csv2kPlugin(Plugin):
+    def create_parser(self, resource):
+        if resource.format == "csv2k":
+            return Csv2kParser(resource)
+
+class Csv2kParser(CsvParser):
+    pass
+
+system.register('csv2k', Csv2kPlugin())
+</code></pre>
+
+  </div>
+  </div><p>Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, <code>extract</code> or <code>Table</code>:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import extract
+
+rows = extract('data/table.csv2k')
+print(rows)
+</code></pre>
+
+  </div>
+  </div><p>This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the <code>frictionless/plugins</code> directory and learn from real-life examples. Also, in the Frictionless codebase there are many <code>Check</code>, <code>Control</code>, <code>Dialect</code>, <code>Loader</code>, <code>Parser</code>, and <code>Server</code> implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support.</p>
+<h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-plugin" class="livemark-reference-heading">Plugin <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-3-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-plugin" class="livemark-reference-heading">Plugin <small>(class)</small></h3>
+      <p>Plugin representation
+
+It's an interface for writing Frictionless plugins.
+You can implement one or more methods to hook into Frictionless system.</p>
+          </div>
+
+    
+    
+        <div>
+      <h3 id="reference-create_adapter" class="livemark-reference-heading">plugin.create_adapter <small>(method)</small></h3>
+      <p>Create adapter</p>
+            <h4>Signature</h4>
+      <p>(source: Any, *, control: Optional[Control] = None, basepath: Optional[str] = None, packagify: bool = False) -&gt; Optional[Adapter]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Any): source        </li>
+                <li>
+          control
+          (Optional[Control]): control        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          packagify
+          (bool)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_loader" class="livemark-reference-heading">plugin.create_loader <small>(method)</small></h3>
+      <p>Create loader</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Optional[Loader]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): loader resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_parser" class="livemark-reference-heading">plugin.create_parser <small>(method)</small></h3>
+      <p>Create parser</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Optional[Parser]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): parser resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_field_candidates" class="livemark-reference-heading">plugin.detect_field_candidates <small>(method)</small></h3>
+      <p>Detect field candidates</p>
+            <h4>Signature</h4>
+      <p>(candidates: List[dict[str, Any]]) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          candidates
+          (List[dict[str, Any]])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_resource" class="livemark-reference-heading">plugin.detect_resource <small>(method)</small></h3>
+      <p>Hook into resource detection</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../resources/file.html">
+        File Resource »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="system.html">
+        « System
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/advanced/system.html b/docs/advanced/system.html
new file mode 100644
index 0000000000..0aa28be3ea
--- /dev/null
+++ b/docs/advanced/system.html
@@ -0,0 +1,4141 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The most important undelaying object in the Frictionless Framework is  system . It's an singleton object avaialble as  frictionless.system .">
+<meta name="keywords" content="system">
+<link rel="icon" href="../../assets/logo.png">
+<title>System | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/advanced/system.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>System</h1>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This feature is currently experimental. The API might change without notice
+  </div>
+</div></div><h2>System Object</h2>
+<p>The most important undelaying object in the Frictionless Framework is <code>system</code>. It's an singleton object avaialble as <code>frictionless.system</code>.</p>
+<h2>System Context</h2>
+<p>Using the <code>system</code> object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it's possible in Python, for example, it can be nested or combined.</p>
+<h3>trusted</h3>
+<p>If data or metadata comes from a trusted origin, it's possible to disable safety checks for paths:</p>
+<div><pre><code class="language-python">with system.use_context(trusted=True):
+    extract('/path/to/file/is/absolute.csv')
+</code></pre>
+</div><h3>onerror</h3>
+<p>To raise warning or errors on data problems, it's possible to use <code>onerror</code> context value. It's default to <code>ignore</code> and can be set to <code>warn</code> or <code>error</code>:</p>
+<div><pre><code class="language-python">with system.use_context(onerror='error'):
+    extract('table-with-error-will-raise-an-exeption.csv')
+</code></pre>
+</div><h3>standards</h3>
+<p>By default, the framework uses coming <code>v2</code> version of the standards for outputing metadata. It's possible to alter this behaviour:</p>
+<div><pre><code class="language-python">with system.use_context(standards='v1'):
+    describe('metadata-will-be-in-v1.csv')
+</code></pre>
+</div><h3>http_session</h3>
+<p>It's possible to provide a custom <code>requests.Session</code>:</p>
+<div><pre><code class="language-python">session = requests.Session()
+with system.use_context(http_session=session):
+    with Resource(BASEURL % "data/table.csv") as resource:
+        assert resource.header == ["id", "name"]
+</code></pre>
+</div><h2>System methods</h2>
+<p>This object can be used to instantiate different kind of lower-level as though <code>Check</code>, <code>Step</code>, or <code>Field</code>. Here is a quick example of using the <code>system</code> object:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, system
+
+# Create
+
+adapter = system.create_adapter(source, control=control)
+loader = system.create_loader(resource)
+parser = system.create_parser(resource)
+
+# Detect
+
+system.detect_resource(resource)
+field_candidates = system.detect_field_candidates()
+
+# Select
+
+Check = system.selectCheck('type')
+Control = system.selectControl('type')
+Error = system.selectError('type')
+Field = system.selectField('type')
+Step = system.selectStep('type')
+</code></pre>
+
+  </div>
+  </div><p>As an extension author you might use the <code>system</code> object in various cases. For example, take a look at this <code>MultipartLoader</code> excerpts:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">def read_line_stream(self):
+    for number, path in enumerate(self.__path, start=1):
+        resource = Resource(path=path)
+        resource.infer(sample=False)
+        with system.create_loader(resource) as loader:
+            for line_number, line in enumerate(loader.byte_stream, start=1):
+                if not self.__headless and number &gt; 1 and line_number == 1:
+                    continue
+                yield line
+</code></pre>
+
+  </div>
+  </div><p>It's important to understand that creating low-level objects in general is more corect using the <code>system</code> object than just classes because it will include all the available plugins in the process.</p>
+<h2>Plugin API</h2>
+<p>The Plugin API almost fully follows the system object's API. So as a plugin author you need to hook into the same methods. For example, let's take a look at a builtin Csv Plugin:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">class CsvPlugin(Plugin):
+    """Plugin for CSV"""
+
+    # Hooks
+
+    def create_parser(self, resource: Resource):
+        if resource.format in ["csv", "tsv"]:
+            return CsvParser(resource)
+
+    def detect_resource(self, resource: Resource):
+        if resource.format in ["csv", "tsv"]:
+            resource.type = "table"
+            resource.mediatype = f"text/{resource.format}"
+
+    def select_Control(self, type: str):
+        if type == "csv":
+            return CsvControl
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-adapter" class="livemark-reference-heading">Adapter <small>(class)</small></h3>
+          <h3 id="hidden-reference-loader" class="livemark-reference-heading">Loader <small>(class)</small></h3>
+          <h3 id="hidden-reference-mapper" class="livemark-reference-heading">Mapper <small>(class)</small></h3>
+          <h3 id="hidden-reference-parser" class="livemark-reference-heading">Parser <small>(class)</small></h3>
+          <h3 id="hidden-reference-plugin" class="livemark-reference-heading">Plugin <small>(class)</small></h3>
+          <h3 id="hidden-reference-system" class="livemark-reference-heading">System <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-adapter" class="livemark-reference-heading">Adapter <small>(class)</small></h3>
+      <p></p>
+          </div>
+
+    
+    
+    
+                    
+    <div>
+      <h3 id="reference-loader" class="livemark-reference-heading">Loader <small>(class)</small></h3>
+      <p>Loader representation</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource)</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-loader.remote" class="livemark-reference-heading">loader.remote <small>(property)</small></h3>
+      <p>
+    Specifies if the resource is remote.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-loader.buffer" class="livemark-reference-heading">loader.buffer <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>types.IBuffer</p>
+          </div>
+        <div>
+      <h3 id="reference-loader.byte_stream" class="livemark-reference-heading">loader.byte_stream <small>(property)</small></h3>
+      <p>Resource byte stream
+
+The stream is available after opening the loader</p>
+            <h4>Signature</h4>
+      <p>types.IByteStream</p>
+          </div>
+        <div>
+      <h3 id="reference-loader.closed" class="livemark-reference-heading">loader.closed <small>(property)</small></h3>
+      <p>Whether the loader is closed</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-loader.resource" class="livemark-reference-heading">loader.resource <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>Resource</p>
+          </div>
+        <div>
+      <h3 id="reference-loader.text_stream" class="livemark-reference-heading">loader.text_stream <small>(property)</small></h3>
+      <p>Resource text stream
+
+The stream is available after opening the loader</p>
+            <h4>Signature</h4>
+      <p>types.ITextStream</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-close" class="livemark-reference-heading">loader.close <small>(method)</small></h3>
+      <p>Close the loader as "filelike.close" does</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-open" class="livemark-reference-heading">loader.open <small>(method)</small></h3>
+      <p>Open the loader as "io.open" does</p>
+                </div>
+        <div>
+      <h3 id="reference-read_byte_stream" class="livemark-reference-heading">loader.read_byte_stream <small>(method)</small></h3>
+      <p>Read bytes stream</p>
+            <h4>Signature</h4>
+      <p>() -&gt; types.IByteStream</p>
+                </div>
+        <div>
+      <h3 id="reference-read_byte_stream_analyze" class="livemark-reference-heading">loader.read_byte_stream_analyze <small>(method)</small></h3>
+      <p>Detect metadta using sample</p>
+            <h4>Signature</h4>
+      <p>(buffer: bytes)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          buffer
+          (bytes): byte buffer        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_byte_stream_buffer" class="livemark-reference-heading">loader.read_byte_stream_buffer <small>(method)</small></h3>
+      <p>Buffer byte stream</p>
+            <h4>Signature</h4>
+      <p>(byte_stream: types.IByteStream)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          byte_stream
+          (types.IByteStream): resource byte stream        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_byte_stream_create" class="livemark-reference-heading">loader.read_byte_stream_create <small>(method)</small></h3>
+      <p>Create bytes stream</p>
+            <h4>Signature</h4>
+      <p>() -&gt; types.IByteStream</p>
+                </div>
+        <div>
+      <h3 id="reference-read_byte_stream_decompress" class="livemark-reference-heading">loader.read_byte_stream_decompress <small>(method)</small></h3>
+      <p>Decompress byte stream</p>
+            <h4>Signature</h4>
+      <p>(byte_stream: types.IByteStream) -&gt; types.IByteStream</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          byte_stream
+          (types.IByteStream): resource byte stream        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_byte_stream_process" class="livemark-reference-heading">loader.read_byte_stream_process <small>(method)</small></h3>
+      <p>Process byte stream</p>
+            <h4>Signature</h4>
+      <p>(byte_stream: types.IByteStream) -&gt; ByteStreamWithStatsHandling</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          byte_stream
+          (types.IByteStream): resource byte stream        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_text_stream" class="livemark-reference-heading">loader.read_text_stream <small>(method)</small></h3>
+      <p>Read text stream</p>
+                </div>
+        <div>
+      <h3 id="reference-write_byte_stream" class="livemark-reference-heading">loader.write_byte_stream <small>(method)</small></h3>
+      <p>Write from a temporary file</p>
+            <h4>Signature</h4>
+      <p>(path: str) -&gt; Any</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          path
+          (str): path to a temporary file        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-write_byte_stream_create" class="livemark-reference-heading">loader.write_byte_stream_create <small>(method)</small></h3>
+      <p>Create byte stream for writing</p>
+            <h4>Signature</h4>
+      <p>(path: str) -&gt; types.IByteStream</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          path
+          (str): path to a temporary file        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-write_byte_stream_save" class="livemark-reference-heading">loader.write_byte_stream_save <small>(method)</small></h3>
+      <p>Store byte stream</p>
+            <h4>Signature</h4>
+      <p>(byte_stream: types.IByteStream) -&gt; Any</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          byte_stream
+          (types.IByteStream)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-mapper" class="livemark-reference-heading">Mapper <small>(class)</small></h3>
+      <p></p>
+          </div>
+
+    
+    
+    
+                    
+    <div>
+      <h3 id="reference-parser" class="livemark-reference-heading">Parser <small>(class)</small></h3>
+      <p>Parser representation</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource)</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-parser.requires_loader" class="livemark-reference-heading">parser.requires_loader <small>(property)</small></h3>
+      <p>
+    Specifies if parser requires the loader to load the
+    data.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[bool]</p>
+          </div>
+        <div>
+      <h3 id="reference-parser.supported_types" class="livemark-reference-heading">parser.supported_types <small>(property)</small></h3>
+      <p>
+    Data types supported by the parser.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[List[str]]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-parser.cell_stream" class="livemark-reference-heading">parser.cell_stream <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>types.ICellStream</p>
+          </div>
+        <div>
+      <h3 id="reference-parser.closed" class="livemark-reference-heading">parser.closed <small>(property)</small></h3>
+      <p>Whether the parser is closed</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-parser.loader" class="livemark-reference-heading">parser.loader <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>Loader</p>
+          </div>
+        <div>
+      <h3 id="reference-parser.resource" class="livemark-reference-heading">parser.resource <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>Resource</p>
+          </div>
+        <div>
+      <h3 id="reference-parser.sample" class="livemark-reference-heading">parser.sample <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>types.ISample</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-close" class="livemark-reference-heading">parser.close <small>(method)</small></h3>
+      <p>Close the parser as "filelike.close" does</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-open" class="livemark-reference-heading">parser.open <small>(method)</small></h3>
+      <p>Open the parser as "io.open" does</p>
+                </div>
+        <div>
+      <h3 id="reference-read_cell_stream" class="livemark-reference-heading">parser.read_cell_stream <small>(method)</small></h3>
+      <p>Read list stream</p>
+            <h4>Signature</h4>
+      <p>() -&gt; types.ICellStream</p>
+                </div>
+        <div>
+      <h3 id="reference-read_cell_stream_create" class="livemark-reference-heading">parser.read_cell_stream_create <small>(method)</small></h3>
+      <p>Create list stream from loader</p>
+            <h4>Signature</h4>
+      <p>() -&gt; types.ICellStream</p>
+                </div>
+        <div>
+      <h3 id="reference-read_cell_stream_handle_errors" class="livemark-reference-heading">parser.read_cell_stream_handle_errors <small>(method)</small></h3>
+      <p>Wrap list stream into error handler</p>
+            <h4>Signature</h4>
+      <p>(cell_stream: types.ICellStream) -&gt; CellStreamWithErrorHandling</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          cell_stream
+          (types.ICellStream)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_loader" class="livemark-reference-heading">parser.read_loader <small>(method)</small></h3>
+      <p>Create and open loader</p>
+            <h4>Signature</h4>
+      <p>() -&gt; Optional[Loader]</p>
+                </div>
+        <div>
+      <h3 id="reference-write_row_stream" class="livemark-reference-heading">parser.write_row_stream <small>(method)</small></h3>
+      <p>Write row stream from the source resource</p>
+            <h4>Signature</h4>
+      <p>(source: TableResource) -&gt; Any</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (TableResource): source resource        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-plugin" class="livemark-reference-heading">Plugin <small>(class)</small></h3>
+      <p>Plugin representation
+
+It's an interface for writing Frictionless plugins.
+You can implement one or more methods to hook into Frictionless system.</p>
+          </div>
+
+    
+    
+        <div>
+      <h3 id="reference-create_adapter" class="livemark-reference-heading">plugin.create_adapter <small>(method)</small></h3>
+      <p>Create adapter</p>
+            <h4>Signature</h4>
+      <p>(source: Any, *, control: Optional[Control] = None, basepath: Optional[str] = None, packagify: bool = False) -&gt; Optional[Adapter]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Any): source        </li>
+                <li>
+          control
+          (Optional[Control]): control        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          packagify
+          (bool)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_loader" class="livemark-reference-heading">plugin.create_loader <small>(method)</small></h3>
+      <p>Create loader</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Optional[Loader]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): loader resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_parser" class="livemark-reference-heading">plugin.create_parser <small>(method)</small></h3>
+      <p>Create parser</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Optional[Parser]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): parser resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_field_candidates" class="livemark-reference-heading">plugin.detect_field_candidates <small>(method)</small></h3>
+      <p>Detect field candidates</p>
+            <h4>Signature</h4>
+      <p>(candidates: List[dict[str, Any]]) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          candidates
+          (List[dict[str, Any]])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_resource" class="livemark-reference-heading">plugin.detect_resource <small>(method)</small></h3>
+      <p>Hook into resource detection</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-system" class="livemark-reference-heading">System <small>(class)</small></h3>
+      <p>System representation
+
+This class provides an ability to make system Frictionless calls.
+It's available as `frictionless.system` singletone.</p>
+            <h4>Signature</h4>
+      <p></p>
+                </div>
+
+        <div>
+      <h3 id="reference-system.supported_hooks" class="livemark-reference-heading">system.supported_hooks <small>(property)</small></h3>
+      <p>
+    A flag that indicates if resource, path or package is trusted.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-system.trusted" class="livemark-reference-heading">system.trusted <small>(property)</small></h3>
+      <p>
+    A flag that indicates if resource, path or package is trusted.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-system.onerror" class="livemark-reference-heading">system.onerror <small>(property)</small></h3>
+      <p>
+    Type of action to take on Error such as "warn", "raise" or "ignore".
+    </p>
+            <h4>Signature</h4>
+      <p>types.IOnerror</p>
+          </div>
+        <div>
+      <h3 id="reference-system.standards" class="livemark-reference-heading">system.standards <small>(property)</small></h3>
+      <p>
+    Setting this value user can use feature of the specific version.
+    The default value is v2.
+    </p>
+            <h4>Signature</h4>
+      <p>types.IStandards</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-system.http_session" class="livemark-reference-heading">system.http_session <small>(property)</small></h3>
+      <p>Return a HTTP session
+
+This method will return a new session or the session
+from `system.use_http_session` context manager</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-create_adapter" class="livemark-reference-heading">system.create_adapter <small>(method)</small></h3>
+      <p>Create adapter</p>
+            <h4>Signature</h4>
+      <p>(source: Any, *, control: Optional[Control] = None, basepath: Optional[str] = None, packagify: bool = False) -&gt; Optional[Adapter]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Any)        </li>
+                <li>
+          control
+          (Optional[Control])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          packagify
+          (bool)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_loader" class="livemark-reference-heading">system.create_loader <small>(method)</small></h3>
+      <p>Create loader</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Loader</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): loader resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-create_parser" class="livemark-reference-heading">system.create_parser <small>(method)</small></h3>
+      <p>Create parser</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Parser</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): parser resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-deregister" class="livemark-reference-heading">system.deregister <small>(method)</small></h3>
+      <p>Deregister a plugin</p>
+            <h4>Signature</h4>
+      <p>(name: str)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str): plugin name        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_field_candidates" class="livemark-reference-heading">system.detect_field_candidates <small>(method)</small></h3>
+      <p>Create candidates</p>
+            <h4>Signature</h4>
+      <p>() -&gt; List[dict[str, Any]]</p>
+                </div>
+        <div>
+      <h3 id="reference-detect_resource" class="livemark-reference-heading">system.detect_resource <small>(method)</small></h3>
+      <p>Hook into resource detection</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-register" class="livemark-reference-heading">system.register <small>(method)</small></h3>
+      <p>Register a plugin</p>
+            <h4>Signature</h4>
+      <p>(name: str, plugin: Plugin)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str): plugin name        </li>
+                <li>
+          plugin
+          (Plugin): plugin to register        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="extending.html">
+        Extension »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="design.html">
+        « Design
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/basic-examples.html b/docs/basic-examples.html
new file mode 100644
index 0000000000..b3cbccfe05
--- /dev/null
+++ b/docs/basic-examples.html
@@ -0,0 +1,4141 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Let's start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let’s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit.">
+<meta name="keywords" content="basic,examples">
+<link rel="icon" href="../assets/logo.png">
+<title>Basic Examples | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  active">
+      <a href="basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/basic-examples.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Basic Examples</h1>
+<p>Let's start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let’s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit.</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv"><code>countries.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat countries.csv
+</code></pre>
+
+<pre><code class="language-markup"># clean this data!
+id,neighbor_id,name,population
+1,Ireland,Britain,67
+2,3,France,n/a,find the population
+3,22,Germany,83
+4,,Italy,60
+5</code></pre>
+
+  </div>
+    <div id="livemark-tabs-1-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('countries.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup"># clean this data!
+id,neighbor_id,name,population
+1,Ireland,Britain,67
+2,3,France,n/a,find the population
+3,22,Germany,83
+4,,Italy,60
+5</code></pre>
+
+  </div>
+  </div><p>As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country's identifier: neighbor_id is a Foreign Key to id.</p>
+<h2>Describing Data</h2>
+<p>First of all, we're going to describe our dataset. Frictionless uses the powerful <a href="https://specs.frictionlessdata.io/">Frictionless Data Specifications</a>. They are very handy to describe:</p>
+<ul>
+<li>a data table - using <a href="https://specs.frictionlessdata.io/table-schema/">Table Schema</a></li>
+<li>a data resource - using <a href="https://specs.frictionlessdata.io/data-resource/">Data Resource</a></li>
+<li>a data package - using <a href="https://specs.frictionlessdata.io/data-package/">Data Package</a></li>
+<li>and other objects</li>
+</ul>
+<p>Let's describe the <code>countries</code> table:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe countries.csv # optionally add --stats to get statistics
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ countries │ table │ countries.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                   countries
+┏━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id      ┃ neighbor_id ┃ name   ┃ population ┃
+┡━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━┩
+│ integer │ string      │ string │ string     │
+└─────────┴─────────────┴────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import describe
+
+resource = describe('countries.csv')
+pprint(resource)
+</code></pre>
+
+<pre><code class="language-python">{'name': 'countries',
+ 'type': 'table',
+ 'path': 'countries.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'encoding': 'utf-8',
+ 'dialect': {'headerRows': [2]},
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'neighbor_id', 'type': 'string'},
+                       {'name': 'name', 'type': 'string'},
+                       {'name': 'population', 'type': 'string'}]}}</code></pre>
+
+  </div>
+  </div><p>As we can see, Frictionless was smart enough to understand that the first row contains a comment. It's good, but we still have a few problems:</p>
+<ul>
+<li>we use <code>n/a</code> as a missing values marker</li>
+<li><code>neighbor_id</code> must be numerical: let's edit the schema</li>
+<li><code>population</code> must be numerical: setting proper missing values will solve it</li>
+<li>there is a relation between the <code>id</code> and <code>neighbor_id</code> fields</li>
+</ul>
+<p>Let's update our metadata and save it to the disc:</p>
+<blockquote>
+<p>Open this file in your favorite editor and update as it's shown below</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe countries.csv --yaml &gt; countries.resource.yaml
+editor countries.resource.yaml
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-3-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_missing_values=["", "n/a"])
+resource = describe("countries.csv", detector=detector)
+resource.schema.set_field_type("neighbor_id", "integer")
+resource.schema.foreign_keys.append(
+    {"fields": ["neighbor_id"], "reference": {"resource": "", "fields": ["id"]}}
+)
+resource.to_yaml("countries.resource.yaml")
+</code></pre>
+
+  </div>
+  </div><p>Let's see what we have created:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat countries.resource.yaml
+</code></pre>
+
+<pre><code class="language-yaml">name: countries
+type: table
+path: countries.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+schema:
+  fields:
+    - name: id
+      type: integer
+    - name: neighbor_id
+      type: integer
+    - name: name
+      type: string
+    - name: population
+      type: integer
+  missingValues:
+    - ''
+    - n/a
+  foreignKeys:
+    - fields:
+        - neighbor_id
+      reference:
+        resource: ''
+        fields:
+          - id</code></pre>
+
+  </div>
+    <div id="livemark-tabs-4-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('countries.resource.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">name: countries
+type: table
+path: countries.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+schema:
+  fields:
+    - name: id
+      type: integer
+    - name: neighbor_id
+      type: integer
+    - name: name
+      type: string
+    - name: population
+      type: integer
+  missingValues:
+    - ''
+    - n/a
+  foreignKeys:
+    - fields:
+        - neighbor_id
+      reference:
+        resource: ''
+        fields:
+          - id</code></pre>
+
+  </div>
+  </div><p>It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn't change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it.</p>
+<h2>Extracting Data</h2>
+<p>It's time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract countries.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ countries │ table │ countries.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                 countries
+┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ neighbor_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ Ireland     │ Britain │ 67         │
+│ 2  │ 3           │ France  │ n/a        │
+│ 3  │ 22          │ Germany │ 83         │
+│ 4  │ None        │ Italy   │ 60         │
+│ 5  │ None        │ None    │ None       │
+└────┴─────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-5-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('countries.csv')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-python">{'countries': [{'id': 1,
+                'name': 'Britain',
+                'neighbor_id': 'Ireland',
+                'population': '67'},
+               {'id': 2,
+                'name': 'France',
+                'neighbor_id': '3',
+                'population': 'n/a'},
+               {'id': 3,
+                'name': 'Germany',
+                'neighbor_id': '22',
+                'population': '83'},
+               {'id': 4,
+                'name': 'Italy',
+                'neighbor_id': None,
+                'population': '60'},
+               {'id': 5,
+                'name': None,
+                'neighbor_id': None,
+                'population': None}]}</code></pre>
+
+  </div>
+  </div><p>Actually, it doesn't look terrible, but in reality, data like this is not quite useful:</p>
+<ul>
+<li>it's not possible to export this data e.g., to SQL because integers are mixed with strings</li>
+<li>there is still a basically empty row we don't want to have</li>
+<li>there are some mistakes in the neighbor_id column</li>
+</ul>
+<p>The output of the extract is in 'utf-8' encoding scheme. Let's use the metadata we save to try extracting data with the help of Frictionless Data specifications:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract countries.resource.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ countries │ table │ countries.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                 countries
+┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ neighbor_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ None        │ Britain │ 67         │
+│ 2  │ 3           │ France  │ None       │
+│ 3  │ 22          │ Germany │ 83         │
+│ 4  │ None        │ Italy   │ 60         │
+│ 5  │ None        │ None    │ None       │
+└────┴─────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-6-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('countries.resource.yaml')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-python">{'countries': [{'id': 1,
+                'name': 'Britain',
+                'neighbor_id': None,
+                'population': 67},
+               {'id': 2,
+                'name': 'France',
+                'neighbor_id': 3,
+                'population': None},
+               {'id': 3,
+                'name': 'Germany',
+                'neighbor_id': 22,
+                'population': 83},
+               {'id': 4,
+                'name': 'Italy',
+                'neighbor_id': None,
+                'population': 60},
+               {'id': 5,
+                'name': None,
+                'neighbor_id': None,
+                'population': None}]}</code></pre>
+
+  </div>
+  </div><p>It's now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can't see in the command-line, but missing values are now <code>None</code> values in Python, and the data can be e.g., exported to SQL. Although, it's still not ready for being published. In the next section, we will validate it!</p>
+<h2>Validating Data</h2>
+<p>Data validation with Frictionless is as easy as describing or extracting data:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate countries.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                    dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃ status  ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ countries │ table │ countries.csv │ INVALID │
+└───────────┴───────┴───────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                   countries
+┏━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row ┃ Field ┃ Type         ┃ Message                                         ┃
+┡━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ 4   │ 5     │ extra-cell   │ Row at position "4" has an extra value in field │
+│     │       │              │ at position "5"                                 │
+│ 7   │ 2     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "neighbor_id" at position "2"                   │
+│ 7   │ 3     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "name" at position "3"                          │
+│ 7   │ 4     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "population" at position "4"                    │
+└─────┴───────┴──────────────┴─────────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-7-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('countries.csv')
+pprint(report.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+
+<pre><code class="language-python">[[4, 5, 'extra-cell'],
+ [7, 2, 'missing-cell'],
+ [7, 3, 'missing-cell'],
+ [7, 4, 'missing-cell']]</code></pre>
+
+  </div>
+  </div><p>Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate countries.resource.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                    dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃ status  ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ countries │ table │ countries.csv │ INVALID │
+└───────────┴───────┴───────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                   countries
+┏━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row ┃ Field ┃ Type         ┃ Message                                         ┃
+┡━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ 3   │ 2     │ type-error   │ Type error in the cell "Ireland" in row "3" and │
+│     │       │              │ field "neighbor_id" at position "2": type is    │
+│     │       │              │ "integer/default"                               │
+│ 4   │ 5     │ extra-cell   │ Row at position "4" has an extra value in field │
+│     │       │              │ at position "5"                                 │
+│ 5   │ None  │ foreign-key  │ Row at position "5" violates the foreign key:   │
+│     │       │              │ for "neighbor_id": values "22" not found in the │
+│     │       │              │ lookup table "" as "id"                         │
+│ 7   │ 2     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "neighbor_id" at position "2"                   │
+│ 7   │ 3     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "name" at position "3"                          │
+│ 7   │ 4     │ missing-cell │ Row at position "7" has a missing cell in field │
+│     │       │              │ "population" at position "4"                    │
+└─────┴───────┴──────────────┴─────────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-8-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('countries.resource.yaml')
+pprint(report.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+
+<pre><code class="language-python">[[3, 2, 'type-error'],
+ [4, 5, 'extra-cell'],
+ [5, None, 'foreign-key'],
+ [7, 2, 'missing-cell'],
+ [7, 3, 'missing-cell'],
+ [7, 4, 'missing-cell']]</code></pre>
+
+  </div>
+  </div><p>Now it's even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors:</p>
+<ul>
+<li>the bad data types, i.e. <code>Ireland</code> instead of an id</li>
+<li>the bad relation between <code>id</code> and <code>neighbor_id</code>: we don't have a country with id 22</li>
+</ul>
+<p>In the next section, we will clean up the data.</p>
+<h2>Transforming Data</h2>
+<p>We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually:</p>
+<ul>
+<li>France's population</li>
+<li>Germany's neighborhood</li>
+</ul>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat &gt; countries.pipeline.yaml &lt;&lt;EOF
+steps:
+  - type: cell-replace
+    fieldName: neighbor_id
+    pattern: '22'
+    replace: '2'
+  - type: cell-replace
+    fieldName: population
+    pattern: 'n/a'
+    replace: '67'
+  - type: row-filter
+    formula: population
+  - type: field-update
+    name: neighbor_id
+    descriptor:
+      type: integer
+  - type: field-update
+    name: population
+    descriptor:
+      type: integer
+  - type: table-normalize
+  - type: table-write
+    path: countries-cleaned.csv
+EOF
+frictionless transform countries.csv --pipeline countries.pipeline.yaml
+</code></pre>
+
+<pre><code class="language-markup">## Schema
+
++-------------+---------+------------+
+| name        | type    | required   |
++=============+=========+============+
+| id          | integer |            |
++-------------+---------+------------+
+| neighbor_id | integer |            |
++-------------+---------+------------+
+| name        | string  |            |
++-------------+---------+------------+
+| population  | integer |            |
++-------------+---------+------------+
+
+## Table
+
++----+-------------+---------+------------+
+| id | neighbor_id | name    | population |
++====+=============+=========+============+
+|  1 | None        | Britain |         67 |
++----+-------------+---------+------------+
+|  2 |           3 | France  |         67 |
++----+-------------+---------+------------+
+|  3 |           2 | Germany |         83 |
++----+-------------+---------+------------+
+|  4 | None        | Italy   |         60 |
++----+-------------+---------+------------+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-9-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource, Pipeline, describe, transform, steps
+
+pipeline = Pipeline(steps=[
+    steps.cell_replace(field_name='neighbor_id', pattern='22', replace='2'),
+    steps.cell_replace(field_name='population', pattern='n/a', replace='67'),
+    steps.row_filter(formula='population'),
+    steps.field_update(name='neighbor_id', descriptor={"type": "integer"}),
+    steps.table_normalize(),
+    steps.table_write(path="countries-cleaned.csv"),
+])
+
+source = Resource('countries.csv')
+target = source.transform(pipeline)
+pprint(target.read_rows())
+</code></pre>
+
+<pre><code class="language-Python">[{'id': 1, 'neighbor_id': None, 'name': 'Britain', 'population': '67'},
+ {'id': 2, 'neighbor_id': 3, 'name': 'France', 'population': '67'},
+ {'id': 3, 'neighbor_id': 2, 'name': 'Germany', 'population': '83'},
+ {'id': 4, 'neighbor_id': None, 'name': 'Italy', 'population': '60'}]</code></pre>
+
+  </div>
+  </div><p>Finally, we've got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat countries-cleaned.csv
+</code></pre>
+
+<pre><code class="language-markup">id,neighbor_id,name,population
+1,,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,,Italy,60</code></pre>
+
+  </div>
+    <div id="livemark-tabs-10-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('countries-cleaned.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,neighbor_id,name,population
+1,,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,,Italy,60</code></pre>
+
+  </div>
+  </div><p>Basically, that's it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">ls countries-cleaned.*
+</code></pre>
+
+<pre><code class="language-markup">countries-cleaned.csv</code></pre>
+
+  </div>
+    <div id="livemark-tabs-11-Python" class="tab-pane fade ">
+    <pre><code class="language-python">import os
+
+files = [f for f in os.listdir('.') if os.path.isfile(f) and f.startswith('countries-cleaned.')]
+print(files)
+</code></pre>
+
+<pre><code class="language-markup">['countries-cleaned.csv']</code></pre>
+
+  </div>
+  </div><p>In the next articles, we will explore more advanced Frictionless functionality.</p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="guides/describing-data.html">
+        Describing Data »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="getting-started.html">
+        « Getting Started
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': 'getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': 'basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': 'universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/checks/baseline.html b/docs/checks/baseline.html
new file mode 100644
index 0000000000..fec00720ca
--- /dev/null
+++ b/docs/checks/baseline.html
@@ -0,0 +1,3570 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty  Checklist  to see the baseline check scope:">
+<meta name="keywords" content="baseline,check">
+<link rel="icon" href="../../assets/logo.png">
+<title>Baseline Check | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/checks/baseline.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Baseline Check</h1>
+<h2>Overview</h2>
+<p>The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty <code>Checklist</code> to see the baseline check scope:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv"><code>capital-invalid.csv</code></a> to reproduce the examples (right-click and "Save link as")..</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Checklist, validate
+
+checklist = Checklist()
+pprint(checklist.scope)
+report = validate('capital-invalid.csv')  # we don't pass the checklist as the empty one is default
+pprint(report.flatten(['type', 'message']))
+</code></pre>
+
+<pre><code class="language-markup">['hash-count',
+ 'byte-count',
+ 'field-count',
+ 'row-count',
+ 'blank-header',
+ 'extra-label',
+ 'missing-label',
+ 'blank-label',
+ 'duplicate-label',
+ 'incorrect-label',
+ 'blank-row',
+ 'primary-key',
+ 'foreign-key',
+ 'extra-cell',
+ 'missing-cell',
+ 'type-error',
+ 'constraint-error',
+ 'unique-error']
+[['duplicate-label',
+  'Label "name" in the header at position "3" is duplicated to a label: at '
+  'position "2"'],
+ ['missing-cell',
+  'Row at position "10" has a missing cell in field "name2" at position "3"'],
+ ['blank-row', 'Row at position "11" is completely blank'],
+ ['type-error',
+  'Type error in the cell "x" in row "12" and field "id" at position "1": type '
+  'is "integer/default"'],
+ ['extra-cell',
+  'Row at position "12" has an extra value in field at position "4"']]</code></pre>
+
+  </div>
+  </div><p>The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it's highly optimized. One should consider the Baseline Check as one unit of validation.</p>
+<h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-checks.baseline" class="livemark-reference-heading">checks.baseline <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-checks.baseline" class="livemark-reference-heading">checks.baseline <small>(class)</small></h3>
+      <p>Check a table for basic errors
+
+Ths check is enabled by default for any `validate` function run.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="table.html">
+        Table Checks »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../portals/zenodo.html">
+        « Zenodo Portal
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/checks/cell.html b/docs/checks/cell.html
new file mode 100644
index 0000000000..fd0275842c
--- /dev/null
+++ b/docs/checks/cell.html
@@ -0,0 +1,4024 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check.">
+<meta name="keywords" content="cell,checks">
+<link rel="icon" href="../../assets/logo.png">
+<title>Cell Checks | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/checks/cell.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Cell Checks</h1>
+<h2>ASCII Value</h2>
+<p>If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source=[["s.no","code"],[1,"ssµ"]]
+report = validate(source, checks=[checks.ascii_value()])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['ascii-value',
+  'The cell ssµ in row at position 2 and field code at position 2 has an '
+  'error: the cell contains non-ascii characters']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.ascii_value" class="livemark-reference-heading">checks.ascii_value <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.ascii_value" class="livemark-reference-heading">checks.ascii_value <small>(class)</small></h4>
+      <p>Check whether all the string characters in the data are ASCII
+
+This check can be enabled using the `checks` parameter
+for the `validate` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Deviated Cell</h2>
+<p>This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python's built-in <code>statistics</code> module which is average plus(+) three standard deviation. The exact algorithm can be found <a href="https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py">here</a>. For example:</p>
+<h3>Example</h3>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv"><code>issue-1066.csv</code></a> to reproduce the examples (right-click and "Save link as")..</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+report = validate("issue-1066.csv", checks=[checks.deviated_cell()])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['deviated-cell',
+  'There is a possible error because the cell is deviated: cell at row "35" '
+  'and field "Gestore" has deviated size']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.deviated_cell" class="livemark-reference-heading">checks.deviated_cell <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.deviated_cell" class="livemark-reference-heading">checks.deviated_cell <small>(class)</small></h4>
+      <p>Check if the cell size is deviated</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, interval: int = 3, ignore_fields: List[str] = NOTHING) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          interval
+          (int)        </li>
+                <li>
+          ignore_fields
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.deviated_cell.interval" class="livemark-reference-heading">checks.deviated_cell.interval <small>(property)</small></h4>
+      <p>
+    Interval specifies number of standard deviation away from the center.
+    The median is used to find the center of the data. The default value
+    is 3.
+    </p>
+            <h5>Signature</h5>
+      <p>int</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.deviated_cell.ignore_fields" class="livemark-reference-heading">checks.deviated_cell.ignore_fields <small>(property)</small></h4>
+      <p>
+    List of data columns to be skipped by check. To all the data columns
+    listed here, check will not be applied. The default value is [].
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Deviated Value</h2>
+<p>This check uses Python's built-in <code>statistics</code> module to check a field's data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the <a href="../../docs/checks/cell.html#reference-checks.deviated_value">API Reference</a> for more details about available options and default values. The exact algorithm can be found <a href="https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94">here</a>. For example:</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = [["temperature"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]]
+report = validate(source, checks=[checks.deviated_value(field_name="temperature")])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['deviated-value',
+  'There is a possible error because the value is deviated: value "1000" in '
+  'row at position "10" and field "temperature" is deviated "[-809.88, '
+  '995.52]"']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.deviated_value" class="livemark-reference-heading">checks.deviated_value <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.deviated_value" class="livemark-reference-heading">checks.deviated_value <small>(class)</small></h4>
+      <p>Check for deviated values in a field.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_name: str, interval: int = 3, average: str = mean) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          interval
+          (int)        </li>
+                <li>
+          average
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.deviated_value.field_name" class="livemark-reference-heading">checks.deviated_value.field_name <small>(property)</small></h4>
+      <p>
+    Name of the field to which the check will be applied.
+    Check will not be applied to fields other than this.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.deviated_value.interval" class="livemark-reference-heading">checks.deviated_value.interval <small>(property)</small></h4>
+      <p>
+    Interval specifies number of standard deviation away from the mean.
+    The default value is 3.
+    </p>
+            <h5>Signature</h5>
+      <p>int</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.deviated_value.average" class="livemark-reference-heading">checks.deviated_value.average <small>(property)</small></h4>
+      <p>
+    It specifies preferred method to calculate average of the data.
+    Default value is "mean". Supported average calculation methods
+    are "mean", "median", and "mode".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Forbidden Value</h2>
+<p>This check ensures that some field doesn't have any forbidden or denylist values.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = b'header\nvalue1\nvalue2'
+checks = [checks.forbidden_value(field_name='header', values=['value2'])]
+report = validate(source, format='csv', checks=checks)
+pprint(report.flatten(['type', 'message']))
+</code></pre>
+
+<pre><code class="language-markup">[['forbidden-value',
+  'The cell value2 in row at position 3 and field header at position 1 has an '
+  'error: forbidden values are "[\'value2\']"']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.forbidden_value" class="livemark-reference-heading">checks.forbidden_value <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.forbidden_value" class="livemark-reference-heading">checks.forbidden_value <small>(class)</small></h4>
+      <p>Check for forbidden values in a field.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_name: str, values: List[Any]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          values
+          (List[Any])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.forbidden_value.field_name" class="livemark-reference-heading">checks.forbidden_value.field_name <small>(property)</small></h4>
+      <p>
+    The name of the field to apply the check. Check will not be applied to
+    other fields.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.forbidden_value.values" class="livemark-reference-heading">checks.forbidden_value.values <small>(property)</small></h4>
+      <p>
+    Specify the forbidden values to check for, in the field specified by
+    "field_name".
+    </p>
+            <h5>Signature</h5>
+      <p>List[Any]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Sequential Value</h2>
+<p>This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn't need to start from 0 or 1. We're providing a field name.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = b'header\n2\n3\n5'
+report = validate(source, format='csv', checks=[checks.sequential_value(field_name='header')])
+pprint(report.flatten(['type', 'message']))
+</code></pre>
+
+<pre><code class="language-markup">[['sequential-value',
+  'The cell 5 in row at position 4 and field header at position 1 has an '
+  'error: the value is not sequential']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.sequential_value" class="livemark-reference-heading">checks.sequential_value <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-10-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.sequential_value" class="livemark-reference-heading">checks.sequential_value <small>(class)</small></h4>
+      <p>Check that a column having sequential values.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_name: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_name
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.sequential_value.field_name" class="livemark-reference-heading">checks.sequential_value.field_name <small>(property)</small></h4>
+      <p>
+    The name of the field to apply the check. Check will not be
+    applied to other fields.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Truncated Value</h2>
+<p>Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let's explore some truncation indicators.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = [["int", "str"], ["a" * 255, 32767], ["good", 2147483647]]
+report = validate(source, checks=[checks.truncated_value()])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['truncated-value',
+  'The cell '
+  'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa '
+  'in row at position 2 and field int at position 1 has an error: value  is '
+  'probably truncated'],
+ ['truncated-value',
+  'The cell 32767 in row at position 2 and field str at position 2 has an '
+  'error: value  is probably truncated'],
+ ['truncated-value',
+  'The cell 2147483647 in row at position 3 and field str at position 2 has an '
+  'error: value  is probably truncated']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.truncated_value" class="livemark-reference-heading">checks.truncated_value <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.truncated_value" class="livemark-reference-heading">checks.truncated_value <small>(class)</small></h4>
+      <p>Check for possible truncated values
+
+This check can be enabled using the `checks` parameter
+for the `validate` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../steps/resource.html">
+        Resource Steps »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="row.html">
+        « Row Checks
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/checks/row.html b/docs/checks/row.html
new file mode 100644
index 0000000000..f6cf672c5a
--- /dev/null
+++ b/docs/checks/row.html
@@ -0,0 +1,3632 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example.">
+<meta name="keywords" content="row,checks">
+<link rel="icon" href="../../assets/logo.png">
+<title>Row Checks | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/checks/row.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Row Checks</h1>
+<h2>Duplicate Row</h2>
+<p>This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = b"header\nvalue\nvalue"
+report = validate(source, format="csv", checks=[checks.duplicate_row()])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['duplicate-row',
+  'Row at position 3 is duplicated: the same as row at position "2"']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.duplicate_row" class="livemark-reference-heading">checks.duplicate_row <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.duplicate_row" class="livemark-reference-heading">checks.duplicate_row <small>(class)</small></h4>
+      <p>Check for duplicate rows
+
+This check can be enabled using the `checks` parameter
+for the `validate` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Row Constraint</h2>
+<p>This check is the most powerful one as it uses the external <code>simpleeval</code> package allowing you to evaluate arbitrary Python expressions on data rows. Let's show on an example.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = [
+    ["row", "salary", "bonus"],
+    [2, 1000, 200],
+    [3, 2500, 500],
+    [4, 1300, 500],
+    [5, 5000, 1000],
+]
+report = validate(source, checks=[checks.row_constraint(formula="salary == bonus * 5")])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['row-constraint',
+  'The row at position 4 has an error: the row constraint to conform is '
+  '"salary == bonus * 5"']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.row_constraint" class="livemark-reference-heading">checks.row_constraint <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.row_constraint" class="livemark-reference-heading">checks.row_constraint <small>(class)</small></h4>
+      <p>Check that every row satisfies a provided Python expression.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, formula: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          formula
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.row_constraint.formula" class="livemark-reference-heading">checks.row_constraint.formula <small>(property)</small></h4>
+      <p>
+    Python expression to apply to all rows. To evaluate the forumula
+    simpleeval library is used.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="cell.html">
+        Cell Checks »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="table.html">
+        « Table Checks
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/checks/table.html b/docs/checks/table.html
new file mode 100644
index 0000000000..e6ab7464b6
--- /dev/null
+++ b/docs/checks/table.html
@@ -0,0 +1,3691 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum  number of fields.">
+<meta name="keywords" content="table,checks">
+<link rel="icon" href="../../assets/logo.png">
+<title>Table Checks | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/checks/table.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Table Checks</h1>
+<h2>Table Dimensions</h2>
+<p>This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum  number of fields.</p>
+<h3>Basic Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = [
+    ["row", "salary", "bonus"],
+    [2, 1000, 200],
+    [3, 2500, 500],
+    [4, 1300, 500],
+    [5, 5000, 1000],
+]
+report = validate(source, checks=[checks.table_dimensions(num_rows=5)])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['table-dimensions',
+  'The data source does not have the required dimensions: number of rows is 4, '
+  'the required is 5']]</code></pre>
+
+  </div>
+  </div><h3>Multiple Limits</h3>
+<p>You can also give multiples limits at the same time:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+source = [
+    ["row", "salary", "bonus"],
+    [2, 1000, 200],
+    [3, 2500, 500],
+    [4, 1300, 500],
+    [5, 5000, 1000],
+]
+report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['table-dimensions',
+  'The data source does not have the required dimensions: number of fields is '
+  '3, the required is 4'],
+ ['table-dimensions',
+  'The data source does not have the required dimensions: number of rows is 4, '
+  'the required is 5']]</code></pre>
+
+  </div>
+  </div><h3>Using Declaratively</h3>
+<p>It is possible to use de check declaratively as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Check, validate, checks
+
+source = [
+    ["row", "salary", "bonus"],
+    [2, 1000, 200],
+    [3, 2500, 500],
+    [4, 1300, 500],
+    [5, 5000, 1000],
+]
+
+check = Check.from_descriptor({"type": "table-dimensions", "minFields": 4, "maxRows": 3})
+report = validate(source, checks=[check])
+pprint(report.flatten(["type", "message"]))
+</code></pre>
+
+<pre><code class="language-markup">[['table-dimensions',
+  'The data source does not have the required dimensions: number of fields is '
+  '3, the minimum is 4'],
+ ['table-dimensions',
+  'The data source does not have the required dimensions: number of rows is 4, '
+  'the maximum is 3']]</code></pre>
+
+  </div>
+  </div><p>But the table dimensions check arguments <code>num_rows</code>, <code>min_rows</code>, <code>max_rows</code>, <code>num_fields</code>, <code>min_fields</code>, <code>max_fields</code> must be passed in camelCase format as the example above i.e. <code>numRows</code>, <code>minRows</code>, <code>maxRows</code>, <code>numFields</code>, <code>minFields</code> and <code>maxFields</code>.</p>
+<h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-checks.table_dimensions" class="livemark-reference-heading">checks.table_dimensions <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-checks.table_dimensions" class="livemark-reference-heading">checks.table_dimensions <small>(class)</small></h4>
+      <p>Check for minimum and maximum table dimensions.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, num_rows: Optional[int] = None, min_rows: Optional[int] = None, max_rows: Optional[int] = None, num_fields: Optional[int] = None, min_fields: Optional[int] = None, max_fields: Optional[int] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          num_rows
+          (Optional[int])        </li>
+                <li>
+          min_rows
+          (Optional[int])        </li>
+                <li>
+          max_rows
+          (Optional[int])        </li>
+                <li>
+          num_fields
+          (Optional[int])        </li>
+                <li>
+          min_fields
+          (Optional[int])        </li>
+                <li>
+          max_fields
+          (Optional[int])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-checks.table_dimensions.num_rows" class="livemark-reference-heading">checks.table_dimensions.num_rows <small>(property)</small></h4>
+      <p>
+    Specify the number of rows to compare with actual rows in
+    the table. If the actual number of rows are less than num_rows it will
+    notify user as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.table_dimensions.min_rows" class="livemark-reference-heading">checks.table_dimensions.min_rows <small>(property)</small></h4>
+      <p>
+    Specify the minimum number of rows that should be in the table.
+    If the actual number of rows are less than min_rows it will notify user
+    as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.table_dimensions.max_rows" class="livemark-reference-heading">checks.table_dimensions.max_rows <small>(property)</small></h4>
+      <p>
+    Specify the maximum number of rows allowed.
+    If the actual number of rows are more than max_rows it will notify user
+    as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.table_dimensions.num_fields" class="livemark-reference-heading">checks.table_dimensions.num_fields <small>(property)</small></h4>
+      <p>
+    Specify the number of fields to compare with actual fields in
+    the table. If the actual number of fields are less than num_fields it will
+    notify user as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.table_dimensions.min_fields" class="livemark-reference-heading">checks.table_dimensions.min_fields <small>(property)</small></h4>
+      <p>
+    Specify the minimum number of fields that should be in the table.
+    If the actual number of fields are less than min_fields it will notify user
+    as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-checks.table_dimensions.max_fields" class="livemark-reference-heading">checks.table_dimensions.max_fields <small>(property)</small></h4>
+      <p>
+    Specify the maximum number of expected fields.
+    If the actual number of fields are more than max_fields it will notify user
+    as errors.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="row.html">
+        Row Checks »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="baseline.html">
+        « Baseline Check
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/codebase/authors.html b/docs/codebase/authors.html
new file mode 100644
index 0000000000..6f23ff0f1f
--- /dev/null
+++ b/docs/codebase/authors.html
@@ -0,0 +1,3507 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail.">
+<meta name="keywords" content="authors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Authors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/AUTHORS.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Authors</h1>
+<blockquote>
+<p>This page is powered by <a href="https://contributors-img.web.app">contributors-img</a></p>
+</blockquote>
+<p>This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail.</p>
+<h2>frictionless-py</h2>
+<a href="https://github.com/frictionlessdata/frictionless-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/frictionless-py">
+</a>
+<h2>datapackage-py</h2>
+<a href="https://github.com/frictionlessdata/datapackage-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/datapackage-py">
+</a>
+<h2>tableschema-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-py">
+</a>
+<h2>tableschema-bigquery-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-bigquery-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-bigquery-py">
+</a>
+<h2>tableschema-ckan-datastore-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-ckan-datastore-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-ckan-datastore-py">
+</a>
+<h2>tableschema-elasticsearch-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-elasticsearch-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-elasticsearch-py">
+</a>
+<h2>tableschema-pandas-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-pandas-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-pandas-py">
+</a>
+<h2>tableschema-sql-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-sql-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-sql-py">
+</a>
+<h2>tableschema-spss-py</h2>
+<a href="https://github.com/frictionlessdata/tableschema-spss-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tableschema-spss-py">
+</a>
+<h2>tabulator-py</h2>
+<a href="https://github.com/frictionlessdata/tabulator-py/graphs/contributors">
+  <img src="https://contributors-img.web.app/image?repo=frictionlessdata/tabulator-py">
+</a>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="license.html">
+        The MIT License (MIT) »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../errors/cell.html">
+        « Cell Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/codebase/changelog.html b/docs/codebase/changelog.html
new file mode 100644
index 0000000000..857ea4adb3
--- /dev/null
+++ b/docs/codebase/changelog.html
@@ -0,0 +1,4170 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted  commit history .">
+<meta name="keywords" content="changelog">
+<link rel="icon" href="../../assets/logo.png">
+<title>Changelog | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/CHANGELOG.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Changelog</h1>
+<p>Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted <a href="https://github.com/frictionlessdata/frictionless-py/commits/main">commit history</a>.</p>
+<h2>v5.15</h2>
+<ul>
+<li>Local development has been migrated to using <a href="https://hatch.pypa.io/latest/">Hatch</a></li>
+</ul>
+<h2>v5.14</h2>
+<ul>
+<li>Rebased packaging on PEP 621</li>
+<li>Extracted experimental application/server from the codebase</li>
+</ul>
+<h2>v5.13</h2>
+<ul>
+<li>Implemented "Metadata.from_descriptor(allow_invalid=False)" (#1501)</li>
+</ul>
+<h2>v5.10</h2>
+<ul>
+<li>
+Various architectural and standards-compatibility improvements (minor breaking changes):<ul>
+<li>
+Added new Console commands:<ul>
+<li>list</li>
+<li>explore</li>
+<li>query</li>
+<li>script</li>
+<li>convert</li>
+<li>publish</li>
+</ul>
+</li>
+<li>Rebased Console commands on Rich (nice output in the Console)</li>
+<li>Fixed <code>extract</code> returning the results depends on the source type (now it's always a dictionary indexed by the resource name)</li>
+<li>Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used</li>
+<li>Improved <code>frictionless.Resource(source)</code> guessing abilities; if you just like to open a table resource use <code>frictionless.resources.TableResource(path=path)</code></li>
+</ul>
+</li>
+</ul>
+<h2>v5.8</h2>
+<ul>
+<li>Implemented Implemented <code>catalog/dataset/package/resource.deference</code> (#1451)</li>
+</ul>
+<h2>v5.7</h2>
+<ul>
+<li>
+Various architectural and standards-compatibility improvements (minor breaking changes):<ul>
+<li>Improved type detection mechanism (including remote descriptors)</li>
+<li>Added <code>resources</code> module including <code>File/Text/Json/TableResource</code></li>
+<li>Deprecated <code>resource.type</code> argument -- use the classes above</li>
+<li>Changed <code>catalog.packages[]</code> to <code>catalog.datasets[].package</code></li>
+<li>Made <code>resource.schema</code> optional (<code>resource.has_schema</code> is removed)</li>
+<li>Made <code>resource.normpath</code> optional (<code>resource.normdata</code> is removed)</li>
+<li>Standards-compatability improvements: profile, stats</li>
+<li>Renamed <code>system/plugin.select_Check/etc</code> to <code>system/plugin.select_check_class/etc</code></li>
+</ul>
+</li>
+</ul>
+<h2>v5.6</h2>
+<ul>
+<li>Added support for <code>sqlalchemy@2</code> (#1427)</li>
+</ul>
+<h2>v5.5</h2>
+<ul>
+<li>Implemented <code>program/resource.index</code> preview (#1395)</li>
+</ul>
+<h2>v5.4</h2>
+<ul>
+<li>Support <code>dialect.skip_blank_rows</code> (#1387)</li>
+</ul>
+<h2>v5.3</h2>
+<ul>
+<li>Support <code>steps.resource_update</code> for resource transformations (#1381)</li>
+</ul>
+<h2>v5.2</h2>
+<ul>
+<li>Added support for <code>wkt</code> format in <code>fields.StringField</code> (#1363 by @jze)</li>
+</ul>
+<h2>v5.1</h2>
+<ul>
+<li>Support <code>descriptor</code> argument for <code>actions/program.extract</code> (#1372)</li>
+</ul>
+<h2>v5.0</h2>
+<ul>
+<li>Frictionless Framework (v5) is out of Beta and released on PyPi</li>
+</ul>
+<h2>v5.0.0b19</h2>
+<ul>
+<li>Implemented <a href="https://framework.frictionlessdata.io/docs/portals/ckan.html">CKAN Integration</a> (<a href="https://github.com/frictionlessdata/frictionless-py/issues/1332">#1185</a>)</li>
+</ul>
+<h2>v5.0.0b8</h2>
+<ul>
+<li>ForeignKeyError has been extended with additional information: <code>fieldNames</code>, <code>fieldCells</code>, <code>referenceName</code>, and <code>referenceFieldNames</code></li>
+</ul>
+<h2>v5.0.0b2</h2>
+<ul>
+<li>Implemented <a href="https://framework.frictionlessdata.io/docs/portals/github.html">Github Integration</a> (<a href="https://github.com/frictionlessdata/frictionless-py/issues/1185">#1185</a>)</li>
+</ul>
+<h2>v5.0.0b1</h2>
+<ul>
+<li>First beta version of <a href="https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html">Frictionless Framework (v5)</a></li>
+</ul>
+<h2>v4.40</h2>
+<ul>
+<li>Added Dialect support to packages (#1137)</li>
+</ul>
+<h2>v4.39</h2>
+<ul>
+<li>Fixed processing of incompatible decimal char in table schema and data   (#1089)</li>
+<li>Added support for Time Zone data (#1097)</li>
+<li>Improved validation messages by adding <code>summary</code> and partial validation details   (#1106)</li>
+<li>
+Implemented new feature <code>summary</code>   (#1127)<ul>
+<li><code>schema.to_summary</code></li>
+<li><code>report.to_summary</code></li>
+<li>Added CLI command <code>summary</code></li>
+</ul>
+</li>
+<li>Fixed file compression <code>package.to_zip</code>   (#1104)</li>
+<li>Implemented feature to validate single resource   (#1112)</li>
+<li>Improved error message to notify about invalid fields   (#1117)</li>
+<li>Fixed type conversion of NaN values for data of type Int64   (#1115)</li>
+<li>Exposed valid/invalid flags in CLI <code>extract</code> command   (#1130)</li>
+<li>Implemented feature <code>package.to_er_diagram</code>   (#1135)</li>
+</ul>
+<h2>v4.38</h2>
+<ul>
+<li>Implemented <code>checks.ascii_value</code> (#1064)</li>
+<li>Implemented <code>checks.deviated_cell</code> (#1069)</li>
+<li>Implemented <code>detector.field_true/false_values</code> (#1074)</li>
+</ul>
+<h2>v4.37</h2>
+<ul>
+<li>
+Deprecated high-level legacy actions (use class-based alternatives):<ul>
+<li><code>describe_*</code></li>
+<li><code>extract_*</code></li>
+<li><code>transform_*</code></li>
+<li><code>validate_*</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.36</h2>
+<ul>
+<li>
+Implemented pipeline actions:<ul>
+<li><code>pipeline.validate</code> (will replace <code>validate_pipeline</code> in v5)</li>
+<li><code>pipeline.transform</code> (will replace <code>transform_pipeline</code> in v5)</li>
+</ul>
+</li>
+<li>
+Implemented inqiury actions:<ul>
+<li><code>inqiury.validate</code> (will replace <code>validate_inqiury</code> in v5)</li>
+</ul>
+</li>
+</ul>
+<h2>v4.35</h2>
+<ul>
+<li>
+Implemented schema actions:<ul>
+<li><code>Schema.describe</code> (will replace <code>describe_schema</code> in v5)</li>
+<li><code>schema.validate</code> (will replace <code>validate_schema</code> in v5)</li>
+</ul>
+</li>
+<li>
+Implemented new transform steps:<ul>
+<li><code>steps.field_merge</code></li>
+<li><code>steps.field_pack</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.34</h2>
+<ul>
+<li>
+Implemented package actions:<ul>
+<li><code>Package.describe</code> (will replace <code>describe_package</code> in v5)</li>
+<li><code>package.extract</code> (will replace <code>extract_package</code> in v5)</li>
+<li><code>package.validate</code> (will replace <code>validate_package</code> in v5)</li>
+<li><code>package.transform</code> (will replace <code>transform_package</code> in v5)</li>
+</ul>
+</li>
+</ul>
+<h2>v4.33</h2>
+<ul>
+<li>
+Implemented resource actions:<ul>
+<li><code>Resource.describe</code> (will replace <code>describe_resource</code> in v5)</li>
+<li><code>resource.extract</code> (will replace <code>extract_resource</code> in v5)</li>
+<li><code>resource.validate</code> (will replace <code>validate_resource</code> in v5)</li>
+<li><code>resource.transform</code> (will replace <code>transform_resource</code> in v5)</li>
+</ul>
+</li>
+</ul>
+<h2>v4.32</h2>
+<ul>
+<li>Added to_markdown() feature to metadata  (#1052)</li>
+</ul>
+<h2>v4.31</h2>
+<ul>
+<li>Added a feature that allows to export table schema as excel  (#1040)</li>
+<li>Added nontabular note to validation results to indicate nontabular file  (#1046)</li>
+<li>Excel stats now shows bytes and hash  (#1045)</li>
+<li>Added pprint feature which displays metadata in a readable and pretty way  (#1039)</li>
+<li>Improved error message if resource.data is not a string  (#1036)</li>
+</ul>
+<h2>v4.29</h2>
+<ul>
+<li>Made Detector's private properties public and writable (#1025)</li>
+</ul>
+<h2>v4.28</h2>
+<ul>
+<li>Improved an order of the metadata in YAML representation</li>
+</ul>
+<h2>v4.27</h2>
+<ul>
+<li>Exposed Dialect options via CLI such as <code>sheet</code>, <code>table</code>, <code>keys</code>, and <code>keyed</code> (#886)</li>
+</ul>
+<h2>v4.26</h2>
+<ul>
+<li>Validate 'schema.fields[].example' (#998)</li>
+</ul>
+<h2>v4.25</h2>
+<ul>
+<li>Allows descriptors that subclass collections.abc.Mapping (#985)</li>
+</ul>
+<h2>v4.24</h2>
+<ul>
+<li>Added support for <code>SqlDialect.basepath</code> (#982) (<a href="https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial">https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial</a>)</li>
+</ul>
+<h2>v4.23</h2>
+<ul>
+<li>Added table dimensions check (#985)</li>
+</ul>
+<h2>v4.22</h2>
+<ul>
+<li>Added "extract --trusted" flag</li>
+</ul>
+<h2>v4.21</h2>
+<ul>
+<li>Added "--json/yaml" CLI options for transform</li>
+</ul>
+<h2>v4.20</h2>
+<ul>
+<li>Improved layout/schema detection algorithms (#945)</li>
+</ul>
+<h2>v4.19</h2>
+<ul>
+<li>Renamed <code>inlineDialect.keys</code> to <code>inlineDialect.data_keys</code> due to a conflict with <code>dict.keys</code> property</li>
+</ul>
+<h2>v4.18</h2>
+<ul>
+<li>Normalized metadata properties (increased type safety)</li>
+</ul>
+<h2>v4.17</h2>
+<ul>
+<li>Add fields, limit, sort and filter options to CkanDialect (#912)</li>
+</ul>
+<h2>v4.16</h2>
+<ul>
+<li>Implemented <code>system/plugin.create_candidates</code> (#893)</li>
+</ul>
+<h2>v4.15</h2>
+<ul>
+<li>Implemented <code>system.get/use_http_session</code> (#892)</li>
+</ul>
+<h2>v4.14</h2>
+<ul>
+<li>SQL Where Clause (#882)</li>
+</ul>
+<h2>v4.13</h2>
+<ul>
+<li>Implemented descriptor type detection for <code>extract/validate</code> (#881)</li>
+</ul>
+<h2>v4.12</h2>
+<ul>
+<li>Support external profiles for data package (#864)</li>
+</ul>
+<h2>v4.11</h2>
+<ul>
+<li>Added <code>json</code> argument to <code>resource.to_snap</code></li>
+</ul>
+<h2>v4.10</h2>
+<ul>
+<li>Support resource/field renaming in transform (#843)</li>
+</ul>
+<h2>v4.9</h2>
+<ul>
+<li>Support <code>--path</code> CLI argument (#829)</li>
+</ul>
+<h2>v4.8</h2>
+<ul>
+<li>Added support for <code>Package(innerpath)</code> argument for unzipping a data package's descriptor</li>
+</ul>
+<h2>v4.7</h2>
+<ul>
+<li>Support control/dialect as JSON in CLI (#806)</li>
+</ul>
+<h2>v4.6</h2>
+<ul>
+<li>Implemented <code>describe_dialect</code> and <code>describe(path, type="dialect")</code></li>
+<li>Support <code>--dialect</code> argument in CLI</li>
+</ul>
+<h2>v4.5</h2>
+<ul>
+<li>Implemented <code>Schema.from_jsonschema</code> (#797)</li>
+</ul>
+<h2>v4.4</h2>
+<ul>
+<li>Use <code>field.constraints.maxLength</code> for SQL's VARCHAR (#795)</li>
+</ul>
+<h2>v4.3</h2>
+<ul>
+<li>Implemented <code>resource.to_view()</code> (#781)</li>
+</ul>
+<h2>v4.2</h2>
+<ul>
+<li>Make <code>fields[].arrayItem</code> errors more granular (#767)</li>
+</ul>
+<h2>v4.1</h2>
+<ul>
+<li>Added support for <code>fields[].arrayItem</code> (#750)</li>
+</ul>
+<h2>v4.0</h2>
+<ul>
+<li>Released <code>frictionless@4</code> :tada:</li>
+</ul>
+<h2>v4.0.0a15</h2>
+<ul>
+<li>
+Updated loaders (#658) (BREAKING)<ul>
+<li>Renamed <code>filelike</code> loader to <code>stream</code> loader</li>
+<li>Migrated from <code>text</code> loader to <code>buffer</code> loader</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a14</h2>
+<ul>
+<li>
+Improve transform API (#657) (BREAKING)<ul>
+<li>Swithed to the <code>transform_resource(resource)</code> signature</li>
+<li>Swithed to the <code>transform_package(package)</code> signature</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a13</h2>
+<ul>
+<li>
+Improved resource/package import/export (#655) (BREAKING)<ul>
+<li>Reworked <code>parser.write_row_stream</code> API</li>
+<li>Reworked <code>resource.from/to</code> API</li>
+<li>Reworked <code>package.from/to</code> API</li>
+<li>Reworked <code>Storage</code> API</li>
+<li>Reworked <code>system.create_storage</code> API</li>
+<li>Merged <code>PandasStorage</code> into <code>PandasParser</code></li>
+<li>Merged <code>SpssStorage</code> into <code>SpssParser</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a12</h2>
+<ul>
+<li>
+Improved transformation steps (#650) (BREAKING)<ul>
+<li>Split value/formula/function concepts</li>
+<li>Renamed a few minor step arguments</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a11</h2>
+<ul>
+<li>
+Improved layout and data streams concepts (#648) (BREAKING)<ul>
+<li>Renamed <code>data_stream</code> to <code>list_stream</code></li>
+<li>Renamed <code>readData</code> to <code>readLists</code></li>
+<li>Renamed <code>sample</code> to <code>fragment</code> (<code>sample</code> now is raw lists)</li>
+<li>Implemented loader.buffer</li>
+<li>Implemented parser.sample</li>
+<li>Added support for function based checks</li>
+<li>Added support for function based steps</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a10</h2>
+<ul>
+<li>Reworked Error.tags (BREAKING)</li>
+<li>Reworked Check API and split labels/header (BREAKING)</li>
+</ul>
+<h2>v4.0.0a9</h2>
+<ul>
+<li>
+Rebased on <code>Detector</code> class (BREAKING)<ul>
+<li>Migrated all infer_*, sync/patch_schema and detect_encoding parameters to <code>Detector</code></li>
+<li>Made <code>resource.infer</code> omit empty objects</li>
+<li>Added <code>resource.read_*(size)</code> argument</li>
+<li>Added <code>resource.labels</code> property</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a8</h2>
+<ul>
+<li>
+Improved checks/steps API (#621) (BREAKING)<ul>
+<li>Updated <code>validate(extra_checks=[...])</code> to <code>validate(checks=[{"code": 'code', ...}])</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a7</h2>
+<ul>
+<li>
+Updated describe/extract/transform/validate APIs (BREAKING)<ul>
+<li>Removed <code>validate_table</code> (use <code>validate_resource</code>)</li>
+<li>Removed legacy <code>Table</code> and <code>File</code> classes</li>
+<li>Removed <code>dataflows</code> plugin</li>
+<li>Replaced <code>nopool</code> by <code>parallel</code> (not parallel by default)</li>
+<li>Renamed <code>report.tables</code> to <code>report.tasks</code></li>
+<li>Rebased on <code>report.tasks[].resource</code> (instead of plain path/scheme/format/etc)</li>
+<li>Flatten Pipeline steps signature</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a6</h2>
+<ul>
+<li>
+Introduced Layout class (BREAKING)<ul>
+<li>Renamed <code>Query</code> class and arguments/properties to <code>Layout</code></li>
+<li>Moved <code>header</code> options from <code>Dialect</code> to <code>Layout</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a5</h2>
+<ul>
+<li>
+Updated transform API<ul>
+<li>Added <code>transform(type)</code> argument</li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a4</h2>
+<ul>
+<li>
+Updated describe API (BREAKING)<ul>
+<li>Renamed <code>describe(source_type)</code> argument to <code>type</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a3</h2>
+<ul>
+<li>
+Updated extract API (BREAKING)<ul>
+<li>Removed <code>extract_table</code> (use <code>extract_resource</code> with the same API)</li>
+<li>Renamed <code>extract(source_type)</code> argument to <code>type</code></li>
+</ul>
+</li>
+</ul>
+<h2>v4.0.0a1</h2>
+<ul>
+<li>
+Initial API/codebase improvements for v4 (BREAKING)<ul>
+<li>Allow <code>Package/Resource(source)</code> notation (guess descriptor/path/etc)</li>
+<li>Renamed <code>schema.infer</code> -&gt; <code>Schema.from_sample</code></li>
+<li>Renamed <code>resource.inline</code> -&gt; <code>resource.memory</code></li>
+<li>Renamed <code>compression_path</code> -&gt; <code>innerpath</code></li>
+<li>Renamed <code>compression: no</code> -&gt; <code>compression: ""</code></li>
+<li>Updated <code>Package/Resource.infer</code> not to infer stats (use <code>stats=True</code>)</li>
+<li>Removed <code>Package/Resource.infer(only_sample)</code> argument</li>
+<li>Removed <code>Resouce.from/to_zip</code> (use <code>Package.from/to_zip</code>)</li>
+<li>Removed <code>Resouce.source</code> (use <code>Resource.data</code> or <code>Resource.fullpath</code>)</li>
+<li>Removed <code>package/resource.infer(source)</code> argument (use constructors)</li>
+<li>Added some new API (will be covered in the updated docs after the v4 release)</li>
+</ul>
+</li>
+</ul>
+<h2>v3.48</h2>
+<ul>
+<li>
+Make Resource independent from Table/File (#607) (BREAKING)<ul>
+<li>Resource can be opened like Table (it's recommended to use Resource instead of Table)</li>
+<li>Renamed <code>resource.read_sample()</code> to <code>resource.sample</code></li>
+<li>Renamed <code>resource.read_header()</code> to <code>resource.header</code></li>
+<li>Renamed <code>resource.read_stats()</code> to <code>resource.stats</code></li>
+<li>Removed <code>resource.to_table()</code></li>
+<li>Removed <code>resource.to_file()</code></li>
+</ul>
+</li>
+</ul>
+<h2>v3.47</h2>
+<ul>
+<li>
+Optimize Row/Header/Table and rename header errors (#601) (BREAKING)<ul>
+<li>Row object is now lazy; it casts data on-demand preserving the same API</li>
+<li>Method <code>resource/table.read_data(_stream)</code> now includes a header row if present</li>
+<li>Renamed <code>errors.ExtraHeaderError-&gt;ExtraLabelError</code> (<code>extra-label-error</code>)</li>
+<li>Renamed <code>errors.MissingHeaderError-&gt;MissingLabelError</code> (<code>missing-label-error</code>)</li>
+<li>Renamed <code>errors.BlankHeaderError-&gt;BlankLabelError</code> (<code>blank-label-error</code>)</li>
+<li>Renamed <code>errors.DuplicateHeaderError-&gt;DuplicateLabelError</code> (<code>duplicate-label-error</code>)</li>
+<li>Renamed <code>errors.NonMatchingHeaderError-&gt;IncorrectLabelError</code> (<code>incorrect-label-error</code>)</li>
+<li>Renamed <code>schema.read/write_data-&gt;read/write_cells</code></li>
+</ul>
+</li>
+</ul>
+<h2>v3.46</h2>
+<ul>
+<li>Renamed aws plugin to s3 (#594) (BREAKING)</li>
+</ul>
+<div><pre><code class="language-bash">$ pip install frictionless[aws] # before
+$ pip install frictionless[s3] # after
+</code></pre>
+</div><h2>v3.45</h2>
+<ul>
+<li>Drafted support for writing Multipart Data (#583)</li>
+</ul>
+<h2>v3.44</h2>
+<ul>
+<li>Added support for writing to Remote Data (#582)</li>
+</ul>
+<h2>v3.43</h2>
+<ul>
+<li>Add support to writing to Google Sheets (#581)</li>
+<li>Renamed <code>gsheet</code> plugin/format to <code>gsheets</code> (BREAKING: minor)</li>
+</ul>
+<h2>v3.42</h2>
+<ul>
+<li>Added support for writing to S3 (#580)</li>
+</ul>
+<h2>v3.41</h2>
+<ul>
+<li>Update Loader/Parser API to write to different targets (#579) (BREAKING: minor)</li>
+</ul>
+<h2>v3.40</h2>
+<ul>
+<li>Implemented a standalone multipart loader (#573)</li>
+</ul>
+<h2>v3.39</h2>
+<ul>
+<li>Fixed Header not being an original one (#572)</li>
+<li>Fix bad format validation (#571)</li>
+<li>Added default errors limit equals to 1000 (#570)</li>
+<li>Added support for field.float_number (#569)</li>
+</ul>
+<h2>v3.38</h2>
+<ul>
+<li>Improved ckan plugin (#560)</li>
+</ul>
+<h2>v3.37</h2>
+<ul>
+<li>Remove not working elastic plugin draft (#558)</li>
+</ul>
+<h2>v3.36</h2>
+<ul>
+<li>Support custom types (#557)</li>
+</ul>
+<h2>v3.35</h2>
+<ul>
+<li>Added "resolve" option to "resource/package.to_zip" (#556)</li>
+</ul>
+<h2>v3.34</h2>
+<ul>
+<li>Moved <code>frictionless.controls</code> to <code>frictionless.plugins.*</code> (BREAKING)</li>
+<li>Moved <code>frictionless.dialects</code> to <code>frictionless.plugins.*</code> (BREAKING)</li>
+<li>Moved <code>frictionless.exceptions.FrictionlessException</code> to <code>frictionless.FrictionlessException</code> (BREAKING)</li>
+<li>Moved <code>excel</code> dependencies to <code>frictionless[excel]</code> extras (BREAKING)</li>
+<li>Moved <code>json</code> dependencies to <code>frictionless[json]</code> extras (BREAKING)</li>
+<li>Consider <code>json</code> files to be a metadata by default (BREAKING)</li>
+</ul>
+<p>Code example:</p>
+<div><pre><code class="language-python"># Before
+# pip install frictionless
+from frictionless import dialects, exceptions
+excel_dialect = dialects.ExcelDialect()
+json_dialect = dialects.JsonDialect()
+exception = exceptions.FrictionlessException()
+
+# After
+# pip install frictionless[excel,json]
+from frictionless import FrictionlessException
+from frictionless.plugins.excel import ExcelDialect
+from frictionless.plugins.json import JsonDialect
+excel_dialect = dialects.ExcelDialect()
+json_dialect = dialects.JsonDialect()
+exception = FrictionlessException()
+</code></pre>
+</div><h2>v3.33</h2>
+<ul>
+<li>Implemented resource.write (#537)</li>
+</ul>
+<h2>v3.32</h2>
+<ul>
+<li>Added url parameter to SQL import/export (#535)</li>
+</ul>
+<h2>v3.31</h2>
+<ul>
+<li>Made tables with header and no data rows valid (#534) (BREAKING: minor)</li>
+</ul>
+<h2>v3.30</h2>
+<ul>
+<li>
+Various CLI improvements (#532)<ul>
+<li>Added autocompletion</li>
+<li>Added stdin support</li>
+<li>Added "extract --csv"</li>
+<li>Exposed more options</li>
+</ul>
+</li>
+</ul>
+<h2>v3.29</h2>
+<ul>
+<li>Added experimental CKAN support (#528)</li>
+</ul>
+<h2>v3.28</h2>
+<ul>
+<li>Add a "nopool" argument to validate (#527)</li>
+</ul>
+<h2>v3.27</h2>
+<ul>
+<li>Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING)</li>
+</ul>
+<h2>v3.26</h2>
+<ul>
+<li>Added "nolookup" argument for validate_package (#515)</li>
+</ul>
+<h2>v3.25</h2>
+<ul>
+<li>Add transform functionality (#505)</li>
+<li>Methods <code>schema.get/remove_field</code> now raise if not found (#505) (BREAKING)</li>
+<li>Methods <code>package.get/remove_resource</code> now raise if not found (#505) (BREAKING)</li>
+</ul>
+<h2>v3.24</h2>
+<ul>
+<li>Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING)</li>
+</ul>
+<h2>v3.23</h2>
+<ul>
+<li>Support "header_case" option for dialects (#488)</li>
+</ul>
+<h2>v3.22</h2>
+<ul>
+<li>Added suppport for DB2 format (#485)</li>
+</ul>
+<h2>v3.21</h2>
+<ul>
+<li>Improved SPSS plugin (#483)</li>
+<li>Improved BigQuery plugin (#470)</li>
+</ul>
+<h2>v3.20</h2>
+<ul>
+<li>Added support for SQL Views (#466)</li>
+</ul>
+<h2>v3.19</h2>
+<ul>
+<li>Rebased AwsLoader on streaming (#460)</li>
+</ul>
+<h2>v3.18</h2>
+<ul>
+<li>Added <code>hashing</code> parameter to <code>describe/describe_package</code></li>
+<li>Removed <code>table.onerror</code> property (BREAKING)</li>
+</ul>
+<h2>v3.17</h2>
+<ul>
+<li>Added timezone for datetime/time parsing (#457) (BREAKING)</li>
+</ul>
+<h2>v3.16</h2>
+<ul>
+<li>Fixed metadata.to_yaml (#455)</li>
+<li>Removed the <code>expand</code> argument from <code>metadata.to_dict</code> (BREAKING)</li>
+</ul>
+<h2>v3.15</h2>
+<ul>
+<li>Added native schema support to SqlParser (#452)</li>
+</ul>
+<h2>v3.14</h2>
+<ul>
+<li>Make Resource the main internal interface (#446) (BREAKING: for plugin authors)</li>
+<li>Move Resource's stats to <code>resource.stats</code> (BREAKING)</li>
+<li>Rename <code>on_error</code> to <code>onerror</code> (BREAKING)</li>
+<li>Added <code>resource.stats.fields</code></li>
+</ul>
+<h2>v3.13</h2>
+<ul>
+<li>Add an <code>on_error</code> argument to Table/Resource/Package (#445)</li>
+</ul>
+<h2>v3.12</h2>
+<ul>
+<li>Added streaming to the extract functions (#442)</li>
+</ul>
+<h2>v3.11</h2>
+<ul>
+<li>Added experimental BigQuery support (#424)</li>
+</ul>
+<h2>v3.10</h2>
+<ul>
+<li>Added experimental SPSS support (#421)</li>
+</ul>
+<h2>v3.9</h2>
+<ul>
+<li>Rebased on a <code>goodtables</code> successor versioning</li>
+</ul>
+<h2>v3.8</h2>
+<ul>
+<li>Add support SQL/Pandas import/export  (#31)</li>
+</ul>
+<h2>v3.7</h2>
+<ul>
+<li>Add support for custom JSONEncoder classes (#24)</li>
+</ul>
+<h2>v3.6</h2>
+<ul>
+<li>Normalize header terminology</li>
+</ul>
+<h2>v3.5</h2>
+<ul>
+<li>Initial public version</li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="contributing.html">
+        Contributing »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="migration.html">
+        « Migration
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/codebase/contributing.html b/docs/codebase/contributing.html
new file mode 100644
index 0000000000..fa1845801e
--- /dev/null
+++ b/docs/codebase/contributing.html
@@ -0,0 +1,3621 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome!">
+<meta name="keywords" content="contributing">
+<link rel="icon" href="../../assets/logo.png">
+<title>Contributing | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/CONTRIBUTING.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Contributing</h1>
+<p>We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome!</p>
+<h2>Introduction</h2>
+<p>We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue.</p>
+<h2>Prerequisites</h2>
+<p>To start working on the project:</p>
+<ul>
+<li>Python 3.10+</li>
+</ul>
+<p>Install Python headers if they are missing:</p>
+<div><pre><code class="language-bash">sudo apt-get install libpython3.10-dev
+</code></pre>
+</div><h2>Enviroment</h2>
+<p>For development orchestration we use <a href="https://github.com/pypa/hatch">Hatch</a> for Python (defined in <code>pyproject.toml</code>). We use <code>make</code> to run high-level commands (defined in <code>Makefile</code>)</p>
+<div><pre><code class="language-bash">pip3 install hatch
+</code></pre>
+</div><p>Before starting with the project we recommend configuring <code>hatch</code>. The following line will ensure that all the virtual environments will be stored in the <code>.python</code> directory in the project root:</p>
+<div><pre><code class="language-bash">hatch config set 'dirs.env.virtual' '.python'
+</code></pre>
+</div><p>Now you can setup you IDE to use a proper Python path:</p>
+<div><pre><code class="language-bash">.python/frictionless/bin/python
+</code></pre>
+</div><p>Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment:</p>
+<div><pre><code class="language-bash">hatch shell
+</code></pre>
+</div><h3>Using Docker</h3>
+<p>Use the following command to build the container:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">make docker
+</code></pre>
+
+  </div>
+  </div><p>This should take care of setting up everything. If the container is
+built without errors, you can then run commands like <code>make</code> inside the
+container to accomplish various tasks (see the next section for details).</p>
+<p>To make things easier, we can create an alias:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">alias "frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev"
+</code></pre>
+
+  </div>
+  </div><p>Then, for example, to run the tests, we can use:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless-dev make test
+</code></pre>
+
+  </div>
+  </div><h2>Development</h2>
+<h3>Codebase</h3>
+<p>Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE):</p>
+<ul>
+<li>code linting: <code>ruff</code></li>
+<li>import sorting: <code>isort</code></li>
+<li>code formatting: <code>black</code></li>
+<li>type checking: <code>pyright</code></li>
+<li>code testing: <code>pytest</code></li>
+</ul>
+<p>You also need <code>git</code> to work on the project, and <code>make</code> is recommended.</p>
+<h3>Documentation</h3>
+<p>To contribute to the documentation, please find an article in the <code>docs</code> folder and update its contents. We write our documentation using <a href="https://livemark.frictionlessdata.io">Livemark</a>. Livemark provides an ability to provide examples without providing an output as it's generated automatically.</p>
+<p>It's possible to run this documentation portal locally:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">livemark start
+</code></pre>
+
+  </div>
+  </div><h3>Running tests offline</h3>
+<p>VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata
+from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator:</p>
+<div><pre><code class="language-python">@pytest.mark.vcr
+def test_connect_with_server():
+	pass
+</code></pre>
+</div><p>Cassettee will be recorded as "test_connect_with_server.yaml". A new call is made when params change. To skip sensitive data,
+we can use filters:</p>
+<div><pre><code class="language-python">@pytest.fixture(scope="module")
+def vcr_config():
+    return {"filter_headers": ["authorization"]}
+</code></pre>
+</div><h4>Regenerating cassettes for CKAN</h4>
+<ul>
+<li>Setup CKAN local instance: <a href="https://github.com/okfn/docker-ckan">https://github.com/okfn/docker-ckan</a></li>
+<li>Create a sysadmin account and generate api token</li>
+<li>Set apikey token in .env file</li>
+</ul>
+<div><pre><code>CKAN_APIKEY=***************************
+</code></pre>
+</div><h4>Regenerating cassettes for Zenodo</h4>
+<p><strong>Read</strong></p>
+<ul>
+<li>To read, we need to use live site, the api library uses it by default.</li>
+<li>Login to zenodo if you have an account and create an access token.</li>
+<li>Set access token in .env file.</li>
+</ul>
+<div><pre><code>ZENODO_ACCESS_TOKEN=***************************
+</code></pre>
+</div><p><strong>Write</strong></p>
+<ul>
+<li>To write we can use either live site or sandbox. We recommend to use sandbox (<a href="https://sandbox.zenodo.org/api/)">https://sandbox.zenodo.org/api/)</a>.</li>
+<li>Login to zenodo(sandbox) if you have an account and create an access token.</li>
+<li>Set access token in .env file.</li>
+</ul>
+<div><pre><code>ZENODO_SANDBOX_ACCESS_TOKEN=***************************
+</code></pre>
+</div><ul>
+<li>Set base_url in the control params</li>
+</ul>
+<div><pre><code>base_url='base_url="https://sandbox.zenodo.org/api/'
+</code></pre>
+</div><h4>Regenerating cassettes for Github</h4>
+<ul>
+<li>Login to github if you have an account and create an access token(Developer settings &gt; Personal access tokens &gt; Tokens).</li>
+<li>Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well.</li>
+</ul>
+<div><pre><code>GITHUB_NAME=FD
+GITHUB_EMAIL=frictionlessdata@okfn.org
+GITHUB_ACCESS_TOKEN=***************************
+</code></pre>
+</div><h2>Releasing</h2>
+<p>To release a new version:</p>
+<ul>
+<li>check that you have push access to the <code>main</code> branch</li>
+<li>run <code>hatch version &lt;major|minor|micro&gt;</code> to update the version</li>
+<li>add changes to <code>CHANGELOG.md</code> if it's not a patch release (major or minor)</li>
+<li>run <code>make release</code> which create a release commit and tag and push it to Github</li>
+<li>an actual release will happen on the Github CI platform after running the tests</li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../universe.html">
+        Universe »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="changelog.html">
+        « Changelog
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/codebase/license.html b/docs/codebase/license.html
new file mode 100644
index 0000000000..aa5168d574
--- /dev/null
+++ b/docs/codebase/license.html
@@ -0,0 +1,3479 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Copyright ©  2020   Open Knowledge Foundation">
+<meta name="keywords" content="the,mit,license,(mit)">
+<link rel="icon" href="../../assets/logo.png">
+<title>The MIT License (MIT) | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/LICENSE.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>The MIT License (MIT)</h1>
+<p>Copyright © <code>2020</code> <code>Open Knowledge Foundation</code></p>
+<p>Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:</p>
+<p>The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.</p>
+<p>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.</p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="migration.html">
+        Migration »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="authors.html">
+        « Authors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/codebase/migration.html b/docs/codebase/migration.html
new file mode 100644
index 0000000000..18cd91687c
--- /dev/null
+++ b/docs/codebase/migration.html
@@ -0,0 +1,3506 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless is a logical continuation of many existing packages created for Frictionless Data as though  datapackage  or  tableschema . Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework's version to another.">
+<meta name="keywords" content="migration">
+<link rel="icon" href="../../assets/logo.png">
+<title>Migration | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/MIGRATION.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Migration</h1>
+<p>Frictionless is a logical continuation of many existing packages created for Frictionless Data as though <code>datapackage</code> or <code>tableschema</code>. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework's version to another.</p>
+<h2>From v4 to v5</h2>
+<p>Since the initial Frictionless Framework release we'd been collecting feedback and analyzing both high-level users' needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog:</p>
+<ul>
+<li><a href="../../blog/2022/08-22-frictionless-framework-v5.html">Welcome Frictionless Framework (v5)</a></li>
+</ul>
+<h2>From dataflows</h2>
+<p>Frictionless Framework provides the <code>frictionless transform</code> function for data transformation. It can be used to migrate from <code>dataflows</code> or <code>datapackage-pipelines</code>:</p>
+<ul>
+<li><a href="../guides/transforming-data.html">Transforming Data</a></li>
+<li><a href="../steps/resource.html">Transform Steps</a></li>
+</ul>
+<h2>From goodtables</h2>
+<p>Frictionless Framework provides the <code>frictionless validate</code> function which is in high-level exactly the same as <code>goodtables validate</code>. Also <code>frictionless describe</code> is an improved version of <code>goodtables init</code>. You instead need to use the <code>frictionless</code> command instead of the <code>goodtables</code> command:</p>
+<ul>
+<li><a href="../guides/validating-data.html">Validating Data</a></li>
+<li><a href="../checks/baseline.html">Validation Checks</a></li>
+<li><a href="../errors/metadata.html">Validation Errors</a></li>
+</ul>
+<h2>From datapackage</h2>
+<p>Frictionless Framework has <code>Package</code> and <code>Resource</code> classes which is almost the same as <code>datapackage</code> has:</p>
+<ul>
+<li><a href="../guides/describing-data.html">Describing Data</a></li>
+<li><a href="../guides/extracting-data.html">Extracting Data</a></li>
+<li><a href="../framework/package.html">Package Class</a></li>
+<li><a href="../framework/resource.html">Resource Class</a></li>
+</ul>
+<h2>From tableschema</h2>
+<p>Frictionless Framework has <code>Schema</code> and <code>Field</code> classes which is almost the same as <code>tableschema</code> has:</p>
+<ul>
+<li><a href="../guides/describing-data.html">Describing Data</a></li>
+<li><a href="../guides/extracting-data.html">Extracting Data</a></li>
+<li><a href="../framework/schema.html">Schema Class</a></li>
+<li><a href="../fields/any.html">Tabular Fields</a></li>
+</ul>
+<h2>From tabulator</h2>
+<p>Frictionless has <code>Resource</code> class which is an equivalent of the tabulator's <code>Stream</code> class:</p>
+<ul>
+<li><a href="../guides/extracting-data.html">Extracting Data</a></li>
+<li><a href="../framework/resource.html">Resource Class</a></li>
+<li><a href="../schemes/aws.html">File Schemes</a></li>
+<li><a href="../formats/csv.html">File Formats</a></li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="changelog.html">
+        Changelog »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="license.html">
+        « The MIT License (MIT)
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/convert.html b/docs/console/convert.html
new file mode 100644
index 0000000000..0240fee932
--- /dev/null
+++ b/docs/console/convert.html
@@ -0,0 +1,3518 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With  convert  command you can quickly convert a tabular data file from one format to another (or the same format with different dialect):">
+<meta name="keywords" content="convert">
+<link rel="icon" href="../../assets/logo.png">
+<title>Convert | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/convert.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Convert</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This command currenlty is in active development and for dialect updated there are very few options available
+  </div>
+</div></div><div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Wit this command Frictionless will drop all invalid data like type errors in cells. Use <code>validate</code> if needed.
+  </div>
+</div></div><p>With <code>convert</code> command you can quickly convert a tabular data file from one format to another (or the same format with different dialect):</p>
+<h2>Format Conversion</h2>
+<p>For example, let's convert a CSV file into an Excel:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless convert table.csv table.xlsx
+</code></pre>
+
+  </div>
+  </div><h2>Downloading Files</h2>
+<p>The command can be used for downloading files as well. For example, let's cherry-pick one CSV file from a Zenodo dataset:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv
+</code></pre>
+
+  </div>
+  </div><h2>Dialect Updates</h2>
+<p>Consider, we want to change the CSV delimiter:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless convert table.csv table-copy.csv --csv-delimiter ;
+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="explore.html">
+        Explore »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="describe.html">
+        « Describe
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/describe.html b/docs/console/describe.html
new file mode 100644
index 0000000000..95e02ef04e
--- /dev/null
+++ b/docs/console/describe.html
@@ -0,0 +1,3549 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With Frtictionless  describe  command you can get a metadata of file or a dataset.">
+<meta name="keywords" content="describe">
+<link rel="icon" href="../../assets/logo.png">
+<title>Describe | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/describe.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Describe</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    The difference with between <code>describe</code> and <code>list</code> command: if <code>datapackage.json</code> is not provided <code>describe</code> will load a sample from every tabular data file in a dataset and infer a schema while <code>list</code> is a very lean and quick command operating only with available metadata and not touching actual data files.
+  </div>
+</div></div><p>With Frtictionless <code>describe</code> command you can get a metadata of file or a dataset.</p>
+<h2>Normal Mode</h2>
+<p>By default, it outputs metadata visually formatted:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe tables/*.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
+┃ name   ┃ type  ┃ path              ┃
+┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
+│ chunk1 │ table │ tables/chunk1.csv │
+│ chunk2 │ table │ tables/chunk2.csv │
+└────────┴───────┴───────────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+       chunk1
+┏━━━━━━━━━┳━━━━━━━━┓
+┃ id      ┃ name   ┃
+┡━━━━━━━━━╇━━━━━━━━┩
+│ integer │ string │
+└─────────┴────────┘
+       chunk2
+┏━━━━━━━━━┳━━━━━━━━┓
+┃ id      ┃ name   ┃
+┡━━━━━━━━━╇━━━━━━━━┩
+│ integer │ string │
+└─────────┴────────┘</code></pre>
+
+  </div>
+  </div><h2>Yaml/Json Mode</h2>
+<p>It's possible to output as <code>YAML</code> or <code>JSON</code>, for example:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe tables/*.csv --yaml
+</code></pre>
+
+<pre><code class="language-markup">resources:
+  - name: chunk1
+    type: table
+    path: tables/chunk1.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string
+  - name: chunk2
+    type: table
+    path: tables/chunk2.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="convert.html">
+        Convert »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="overview.html">
+        « Overview
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/explore.html b/docs/console/explore.html
new file mode 100644
index 0000000000..a0c78f5521
--- /dev/null
+++ b/docs/console/explore.html
@@ -0,0 +1,4544 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content='With the  explore  command you can open your dataset in  Visidata  which is an amazing visual tool for working with tabular data in Console. For example try "Shift+F" for creating data histograms!'>
+<meta name="keywords" content="explore">
+<link rel="icon" href="../../assets/logo.png">
+<title>Explore | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/explore.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Explore</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    If you started an exploration session and can't get out: press "q" on your keyboard.
+  </div>
+</div></div><p>With the <code>explore</code> command you can open your dataset in <a href="https://www.visidata.org/">Visidata</a> which is an amazing visual tool for working with tabular data in Console. For example try "Shift+F" for creating data histograms!</p>
+<h2>Installation</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[visidata]
+pip install frictionless[visidata,zenodo] # for examples in this tutorial
+</code></pre>
+
+  </div>
+  </div><h2>Example</h2>
+<p>For example, let's expore this interesing dataset:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless explore https://zenodo.org/record/3977957
+</code></pre>
+
+  </div>
+  </div><div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/explore.png">
+</div></div><h2>Documentation</h2>
+<p>Before entering Visidata, it's highly recommended to read its documentation:</p>
+<ul>
+<li><a href="https://www.visidata.org/docs/">https://www.visidata.org/docs/</a></li>
+</ul>
+<p>You can get it in Console as well:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">vd --help
+</code></pre>
+
+<pre><code class="language-markup">vd(1)                        Quick Reference Guide                       vd(1)
+
+NAME
+     VisiData — a terminal utility for exploring and arranging tabular data
+
+SYNOPSIS
+     vd [options] [input ...]
+     vd [options] --play cmdlog [-w waitsecs] [--batch] [-o output]
+        [field=value]
+     vd [options] [input ...] +toplevel:subsheet:col:row
+
+DESCRIPTION
+     VisiData is an easy-to-use multipurpose tool to explore, clean, edit, and
+     restructure data. Rows can be selected, filtered, and grouped; columns
+     can be rearranged, transformed, and derived via regex or Python expres‐
+     sions; and workflows can be saved, documented, and replayed.
+
+   REPLAY MODE
+     -p, --play=cmdlog       replay a saved cmdlog within the interface
+     -w, --replay-wait=seconds
+                             wait seconds between commands
+     -b, --batch             replay in batch mode (with no interface)
+     -o, --output=file       save final visible sheet to file as .tsv
+     --replay-movement       toggle --play to move cursor cell-by-cell
+     field=value             replace "{field}" in cmdlog contents with value
+
+   Commands During Replay
+        ^U                   pause/resume replay
+        ^N                   execute next row in replaying sheet
+        ^K                   cancel current replay
+
+   GLOBAL COMMANDS
+     All keystrokes are case sensitive. The ^ prefix is shorthand for Ctrl.
+
+   Keystrokes to start off with
+      ^Q              abort program immediately
+      ^C              cancel user input or abort all async threads on current
+                      sheet
+     g^C              abort all secondary threads
+       q              quit current sheet or menu
+       Q              quit current sheet and free associated memory
+      gq              quit all sheets (clean exit)
+
+      ^H              activate help menu (Enter/left-mouse to expand submenu
+                      or execute command)
+     g^H              view this man page
+     z^H              view sheet of command longnames and keybindings for cur‐
+                      rent sheet
+     Space longname   execute command by its longname
+
+       U              undo the most recent modification (requires enabled
+                      options.undo)
+       R              redo the most recent undo (requires enabled
+                      options.undo)
+
+   Cursor Movement
+     Arrow PgUp       go as expected
+      h   j   k   l   go left/down/up/right
+     gh  gj  gk  gl   go all the way to the left/bottom/top/right of sheet
+          G  gg       go all the way to the bottom/top of sheet
+     Ic. End  Home    go all the way to the bottom/top of sheet
+     ^B  ^F           scroll one page back/forward
+     ^Left ^Right     scroll one page left/right
+     zz               scroll current row to center of screen
+
+     ^^ (Ctrl+^)      jump to previous sheet (swaps with current sheet)
+
+      /   ? regex     search for regex forward/backward in current column
+     g/  g? regex     search for regex forward/backward over all visible
+                      columns
+     z/  z? expr      search by Python expr forward/backward in current column
+                      (with column names as variables)
+      n   N           go to next/previous match from last regex search
+
+      &lt;   &gt;           go up/down current column to next value
+     z&lt;  z&gt;           go up/down current column to next null value
+      {   }           go up/down current column to next selected row
+
+      c regex         go to next column with name matching regex
+      r regex         go to next row with key matching regex
+     zc  zr number    go to column/row number (0-based)
+
+      H   J   K   L   slide current row/column left/down/up/right
+     gH  gJ  gK  gL   slide current row/column all the way to the left/bot‐
+                      tom/top/right of sheet
+     zH  zJ  zK  zK number
+                      slide current row/column number positions to the
+                      left/down/up/right
+
+     zh  zj  zk  zl   scroll one left/down/up/right
+
+   Column Manipulation
+       _ (underbar)   toggle width of current column between full and default
+                      width
+      g_              toggle widths of all visible columns between full and
+                      default width
+      z_ number       adjust width of current column to number
+     gz_ number       adjust widths of all visible columns to Ar number
+
+      - (hyphen)      hide current column
+     z-               reduce width of current column by half
+     gv               unhide all columns
+
+     ! z!             toggle/unset current column as a key column
+     ~  #  %  $  @  z#
+                      set type of current column to str/int/float/cur‐
+                      rency/date/len
+       ^              rename current column
+      g^              rename all unnamed visible columns to contents of se‐
+                      lected rows (or current row)
+      z^              rename current column to combined contents of current
+                      cell in selected rows (or current row)
+     gz^              rename all visible columns to combined contents of cur‐
+                      rent column for selected rows (or current row)
+
+       = expr         create new column from Python expr, with column names,
+                      and attributes, as variables
+      g= expr         set current column for selected rows to result of Python
+                      expr
+     gz= expr         set current column for selected rows to the items in
+                      result of Python sequence expr
+      z= expr         evaluate Python expression on current row and set
+                      current cell with result of Python expr
+
+       i              add column with incremental values
+      gi              set current column for selected rows to incremental
+                      values
+      zi step         add column with values at increment step
+     gzi step         set current column for selected rows at increment step
+
+      ' (tick)        add a frozen copy of current column with all cells eval‐
+                      uated
+     g'               open a frozen copy of current sheet with all visible
+                      columns evaluated
+     z'  gz'          add/reset cache for current/all visible column(s)
+
+      : regex         add new columns from regex split; number of columns
+                      determined by example row at cursor
+      ; regex         add new columns from capture groups of regex (also
+                      requires example row)
+     z; expr          create new column from bash expr, with $columnNames as
+                      variables
+      * regex/subst   add column derived from current column, replacing regex
+                      with subst (may include \1 backrefs)
+     g*  gz* regex/subst
+                      modify selected rows in current/all visible column(s),
+                      replacing regex with subst (may include \1 backrefs)
+
+      (   g(          expand current/all visible column(s) of lists (e.g. [3])
+                      or dicts (e.g. {3}) one level
+     z(  gz( depth    expand current/all visible column(s) of lists (e.g. [3])
+                      or dicts (e.g. {3}) to given depth (0= fully)
+      )               unexpand current column; restore original column and re‐
+                      move other columns at this level
+     zM               row-wise expand current column of lists (e.g. [3]) or
+                      dicts (e.g. {3}) within that column
+
+   Row Selection
+       s   t   u      select/toggle/unselect current row
+      gs  gt  gu      select/toggle/unselect all rows
+      zs  zt  zu      select/toggle/unselect all rows from top to cursor
+     gzs gzt gzu      select/toggle/unselect all rows from cursor to bottom
+      |   \ regex     select/unselect rows matching regex in current column
+     g|  g\ regex     select/unselect rows matching regex in any visible
+                      column
+     z|  z\ expr      select/unselect rows matching Python expr in any visible
+                      column
+      , (comma)       select rows matching display value of current cell in
+                      current column
+     g,               select rows matching display value of current row in all
+                      visible columns
+     z, gz,           select rows matching typed value of current cell/row in
+                      current column/all visible columns
+
+   Row Sorting/Filtering
+       [    ]         sort ascending/descending by current column; replace any
+                      existing sort criteria
+      g[   g]         sort ascending/descending by all key columns; replace
+                      any existing sort criteria
+      z[   z]         sort ascending/descending by current column; add to ex‐
+                      isting sort criteria
+     gz[  gz]         sort ascending/descending by all key columns; add to ex‐
+                      isting sort criteria
+      "               open duplicate sheet with only selected rows
+     g"               open duplicate sheet with all rows
+     gz"              open duplicate sheet with deepcopy of selected rows
+
+     The rows in these duplicated sheets (except deepcopy) are references to
+     rows on the original source sheets, and so edits to the filtered rows
+     will naturally be reflected in the original rows.  Use g' to freeze sheet
+     contents in a deliberate copy.
+
+   Editing Rows and Cells
+       a   za         append blank row/column; appended columns cannot be
+                      copied to clipboard
+      ga  gza number  append number blank rows/columns
+       d   gd         delete current/selected row(s)
+       y   gy         yank (copy) current/all selected row(s) to clipboard in
+                      Memory Sheet
+       x   gx         cut (copy and delete) current/all selected row(s) to
+                      clipboard in Memory Sheet
+      zy  gzy         yank (copy) contents of current column for
+                      current/selected row(s) to clipboard in Memory Sheet
+      zd  gzd         set contents of current column for current/selected
+                      row(s) to options.null_value
+      zx  gzx         cut (copy and delete) contents of current column for
+                      current/selected row(s) to clipboard in Memory Sheet
+       p    P         paste clipboard rows after/before current row
+      zp  gzp         set cells of current column for current/selected row(s)
+                      to last clipboard value
+      zP  gzP         paste to cells of current column for current/selected
+                      row(s) using the system clipboard
+       Y   gY         yank (copy) current/all selected row(s) to system
+                      clipboard (using options.clipboard_copy_cmd)
+      zY  gzY         yank (copy) contents of current column for
+                      current/selected row(s) to system clipboard (using
+                      options.clipboard_copy_cmd)
+       f              fill null cells in current column with contents of non-
+                      null cells up the current column
+       e text         edit contents of current cell
+      ge text         set contents of current column for selected rows to text
+
+     Commands While Editing Input
+        Enter  ^C        accept/abort input
+        ^O  g^O          open external $EDITOR to edit contents of current/se‐
+                         lected rows in current column
+        ^R               reload initial value
+        ^A   ^E          go to beginning/end of line
+        ^B   ^F          go back/forward one character
+        ^←   ^→ (arrow)  go back/forward one word
+        ^H   ^D          delete previous/current character
+        ^T               transpose previous and current characters
+        ^U   ^K          clear from cursor to beginning/end of line
+        ^Y               paste from cell clipboard
+        Backspace  Del   delete previous/current character
+        Insert           toggle insert mode
+        Up  Down         set contents to previous/next in history
+        Tab  Shift+Tab   autocomplete input (when available)
+        Shift+Arrow      move cursor in direction of Arrow and re-enter edit
+                         mode
+
+   Data Toolkit
+      o input         open input in VisiData
+     zo               open file or url from path in current cell
+     ^S g^S filename  save current/all sheet(s) to filename in format
+                      determined by extension (default .tsv)
+                      Note: if the format does not support multisave, or the
+                      filename ends in a /, a directory will be created.
+     z^S filename     save current column only to filename in format
+                      determined by extension (default .tsv)
+     ^D filename.vdj  save CommandLog to filename.vdj file
+     A                open new blank sheet with one column
+     T                open new sheet that has rows and columns of current
+                      sheet transposed
+
+      + aggregator    add aggregator to current column (see Frequency Table)
+     z+ aggregator    display result of aggregator over values in selected
+                      rows for current column; store result in Memory Sheet
+      &amp;               append top two sheets in Sheets Stack
+     g&amp;               append all sheets in Sheets Stack
+
+      w nBefore nAfter
+                      add column where each row contains a list of that row,
+                      nBefore rows, and nAfter rows
+
+   Data Visualization
+      . (dot)       plot current numeric column vs key columns. The numeric
+                    key column is used for the x-axis; categorical key column
+                    values determine color.
+     g.             plot a graph of all visible numeric columns vs key
+                    columns.
+
+     If rows on the current sheet represent plottable coordinates (as in .shp
+     or vector .mbtiles sources),  . plots the current row, and g. plots all
+     selected rows (or all rows if none selected).
+
+     Canvas-specific Commands
+         +   -              increase/decrease zoom level, centered on cursor
+         _ (underbar)       zoom to fit full extent
+        z_ (underbar)       set aspect ratio
+         x xmin xmax        set xmin/xmax on graph
+         y ymin ymax        set ymin/ymax on graph
+         s   t   u          select/toggle/unselect rows on source sheet con‐
+                            tained within canvas cursor
+        gs  gt  gu          select/toggle/unselect rows on source sheet visi‐
+                            ble on screen
+         d                  delete rows on source sheet contained within can‐
+                            vas cursor
+        gd                  delete rows on source sheet visible on screen
+         Enter              open sheet of source rows contained within canvas
+                            cursor
+        gEnter              open sheet of source rows visible on screen
+         1 - 9              toggle display of layers
+        ^L                  redraw all pixels on canvas
+         v                  toggle show_graph_labels option
+        mouse scrollwheel   zoom in/out of canvas
+        left click-drag     set canvas cursor
+        right click-drag    scroll canvas
+
+   Split Screen
+      Z             split screen in half, so that second sheet on the stack is
+                    visible in a second pane
+     zZ             split screen, and queries for height of second pane
+
+     Split Window specific Commands
+        gZ                  close an already split screen, current pane full
+                            screens
+         Z                  push second sheet on current pane's stack to the
+                            top of the other pane's stack
+         Tab                jump to other pane
+        gTab                swap panes
+        g Ctrl+^            cycle through sheets
+
+   Other Commands
+     Q                quit current sheet and remove it from the CommandLog
+     v                toggle sheet-specific visibility (multi-line rows on
+                      Sheet, legends/axes on Graph)
+
+      ^E  g^E         view traceback for most recent error(s)
+     z^E              view traceback for error in current cell
+
+      ^L              refresh screen
+      ^R              reload current sheet
+      ^Z              suspend VisiData process
+      ^G              show cursor position and bounds of current sheet on sta‐
+                      tus line
+      ^V              show version and copyright information on status line
+      ^P              open Status History
+     m keystroke      first, begin recording macro; second, prompt for
+                      keystroke , and complete recording. Macro can then be
+                      executed everytime provided keystroke is used. Will
+                      override existing keybinding. Macros will run on current
+                      row, column, sheet.
+     gm               open an index of all existing macros. Can be directly
+                      viewed with Enter, and then modified with ^S.
+
+      ^Y  z^Y  g^Y    open current row/cell/sheet as Python object
+      ^X expr         evaluate Python expr and opens result as Python object
+     z^X expr         evaluate Python expr, in context of current row, and
+                      open result as Python object
+     g^X module       import Python module in the global scope
+
+   Internal Sheets List
+      .  Directory Sheet             browse properties of files in a directory
+      .  Plugins Sheet               browse, install, and (de)activate plugins
+      .  Memory Sheet (Alt+Shift+M)        browse saved values, including
+         clipboard
+
+     Metasheets
+      .  Columns Sheet (Shift+C)     edit column properties
+      .  Sheets Sheet (Shift+S)      jump between sheets or join them together
+      .  Options Sheet (Shift+O)     edit configuration options
+      .  Commandlog (Shift+D)        modify and save commands for replay
+      .  Error Sheet (Ctrl+E)            view last error
+      .  Status History (Ctrl+P)         view history of status messages
+      .  Threads Sheet (Ctrl+T)          view, cancel, and profile
+         asynchronous threads
+
+     Derived Sheets
+      .  Frequency Table (Shift+F)   group rows by column value, with
+         aggregations of other columns
+      .  Describe Sheet (Shift+I)    view summary statistics for each column
+      .  Pivot Table (Shift+W)       group rows by key and summarize current
+         column
+      .  Melted Sheet (Shift+M)      unpivot non-key columns into
+         variable/value columns
+      .  Transposed Sheet (Shift+T)   open new sheet with rows and columns
+         transposed
+
+   INTERNAL SHEETS
+   Directory Sheet
+     (global commands)
+        Space open-dir-current
+                         open the Directory Sheet for the current directory
+     (sheet-specific commands)
+        Enter  gEnter    open current/selected file(s) as new sheet(s)
+         ^O  g^O         open current/selected file(s) in external $EDITOR
+         ^R  z^R  gz^R   reload information for all/current/selected file(s)
+          d   gd         delete current/selected file(s) from filesystem, upon
+                         commit
+          y   gy directory
+                         copy current/selected file(s) to given directory,
+                         upon commit
+          e   ge name    rename current/selected file(s) to name
+          ` (backtick)   open parent directory
+        z^S              commit changes to file system
+
+   Plugins Sheet
+     Browse through a list of available plugins. VisiData needs to be
+     restarted before plugin activation takes effect. Installation may require
+     internet access.
+     (global commands)
+        Space open-plugins
+                         open the Plugins Sheet
+     (sheet-specific commands)
+        a                install and activate current plugin
+        d                deactivate current plugin
+
+   Memory Sheet
+     Browse through a list of stored values, referanceable in expressions
+     through their name.
+     (global commands)
+        Alt+Shift+M      open the Memory Sheet
+        Alt+M name       store value in current cell in Memory Sheet under
+                         name
+     (sheet-specific commands)
+        e                edit either value or name, to edit reference
+
+   METASHEETS
+   Columns Sheet (Shift+C)
+     Properties of columns on the source sheet can be changed with standard
+     editing commands (e ge g= Del) on the Columns Sheet. Multiple aggregators
+     can be set by listing them (separated by spaces) in the aggregators
+     column. The 'g' commands affect the selected rows, which are the literal
+     columns on the source sheet.
+     (global commands)
+        gC               open Columns Sheet with all visible columns from all
+                         sheets
+     (sheet-specific commands)
+         &amp;               add column from appending selected source columns
+        g! gz!           toggle/unset selected columns as key columns on
+                         source sheet
+        g+ aggregator    add Ar aggregator No to selected source columns
+        g- (hyphen)      hide selected columns on source sheet
+        g~ g# g% g$ g@ gz# z%
+                         set type of selected columns on source sheet to
+                         str/int/float/currency/date/len/floatsi
+         Enter           open a Frequency Table sheet grouped by column
+                         referenced in current row
+
+   Sheets Sheet (Shift+S)
+     open Sheets Stack, which contains only the active sheets on the current
+     stack
+     (global commands)
+        gS               open Sheets Sheet, which contains all sheets from
+                         current session, active and inactive
+        Alt number       jump to sheet number
+     (sheet-specific commands)
+         Enter           jump to sheet referenced in current row
+        gEnter           push selected sheets to top of sheet stack
+         a               add row to reference a new blank sheet
+        gC  gI           open Columns Sheet/Describe Sheet with all visible
+                         columns from selected sheets
+        g^R              reload all selected sheets
+        z^C  gz^C        abort async threads for current/selected sheets(s)
+        g^S              save selected or all sheets
+         &amp; jointype      merge selected sheets with visible columns from all,
+                         keeping rows according to jointype:
+                         .  inner  keep only rows which match keys on all
+                            sheets
+                         .  outer  keep all rows from first selected sheet
+                         .  full   keep all rows from all sheets (union)
+                         .  diff   keep only rows NOT in all sheets
+                         .  append combine all rows from all sheets
+                         .  concat similar to 'append' but keep first sheet
+                            type and columns
+                         .  extend copy first selected sheet, keeping all rows
+                            and sheet type, and extend with columns from other
+                            sheets
+                         .  merge  mostly keep all rows from first selected
+                            sheet, except prioritise cells with non-null/non-
+                            error values
+
+   Options Sheet (Shift+O)
+     (global commands)
+        Shift+O          edit global options (apply to all sheets)
+        zO               edit sheet options (apply to current sheet only)
+        gO               open options.config as TextSheet
+     (sheet-specific commands)
+        Enter  e         edit option at current row
+        d                remove option override for this context
+        ^S               save option configuration to foo.visidatarc
+
+   CommandLog (Shift+D)
+     (global commands)
+        D                open current sheet's CommandLog with all other loose
+                         ends removed; includes commands from parent sheets
+        gD               open global CommandLog for all commands executed in
+                         the current session
+        zD               open current sheet's CommandLog with the parent
+                         sheets commands' removed
+     (sheet-specific commands)
+          x              replay command in current row
+         gx              replay contents of entire CommandLog
+         ^C              abort replay
+
+   Threads Sheet (Ctrl+T)
+     (global commands)
+        ^T               open global Threads Sheet for all asynchronous
+                         threads running
+        z^T              open current sheet's Threads Sheet
+     (sheet-specific commands)
+         ^C              abort thread at current row
+        g^C              abort all threads on current Threads Sheet
+
+   DERIVED SHEETS
+   Frequency Table (Shift+F)
+     A Frequency Table groups rows by one or more columns, and includes
+     summary columns for those with aggregators.
+     (global commands)
+        gF               open Frequency Table, grouped by all key columns on
+                         source sheet
+        zF               open one-line summary for all rows and selected rows
+     (sheet-specific commands)
+         s   t   u       select/toggle/unselect these entries in source sheet
+         Enter  gEnter   open copy of source sheet with rows that are grouped
+                         in current cell / selected rows
+
+   Describe Sheet (Shift+I)
+     A Describe Sheet contains descriptive statistics for all visible columns.
+     (global commands)
+        gI               open Describe Sheet for all visible columns on all
+                         sheets
+     (sheet-specific commands)
+        zs  zu           select/unselect rows on source sheet that are being
+                         described in current cell
+         !               toggle/unset current column as a key column on source
+                         sheet
+         Enter           open a Frequency Table sheet grouped on column
+                         referenced in current row
+        zEnter           open copy of source sheet with rows described in cur‐
+                         rent cell
+
+   Pivot Table (Shift+W)
+     Set key column(s) and aggregators on column(s) before pressing Shift+W on
+     the column to pivot.
+     (sheet-specific commands)
+         Enter           open sheet of source rows aggregated in current pivot
+                         row
+        zEnter           open sheet of source rows aggregated in current pivot
+                         cell
+
+   Melted Sheet (Shift+M)
+     Open Melted Sheet (unpivot), with key columns retained and all non-key
+     columns reduced to Variable-Value rows.
+     (global commands)
+        gM regex         open Melted Sheet (unpivot), with key columns
+                         retained and regex capture groups determining how the
+                         non-key columns will be reduced to Variable-Value
+                         rows.
+
+   Python Object Sheet (^X ^Y g^Y z^Y)
+     (sheet-specific commands)
+         Enter           dive further into Python object
+         v               toggle show/hide for methods and hidden properties
+        gv  zv           show/hide methods and hidden properties
+
+COMMANDLINE OPTIONS
+     Add -n/--nonglobal to make subsequent CLI options sheet-specific
+     (applying only to paths specified directly on the CLI). By default, CLI
+     options apply to all sheets.
+
+     Options can also be set via the Options Sheet or a .visidatarc (see
+     FILES).
+
+     -P=longname                  preplay longname before replay or regular
+                                  launch; limited to Base Sheet bound commands
+     +toplevel:subsheet:col:row   launch vd with subsheet of toplevel at
+                                  top-of-stack, and cursor at col and row; all
+                                  arguments are optional
+
+     -f, --filetype=filetype      tsv                set loader to use for
+                                  filetype instead of file extension
+     -d, --delimiter=delimiter    \t                 field delimiter to use
+                                  for tsv/usv filetype
+     -y, --confirm-overwrite=F    True               overwrite existing files
+                                  without confirmation
+     -N, --nothing=T              False               disable loading
+                                  .visidatarc and plugin addons
+     --visidata-dir=str           ~/.visidata/       directory to load and
+                                                     store additional files
+     --mouse-interval=int         1                  max time between
+                                                     press/release for click
+                                                     (ms)
+     --null-value=NoneType        None               a value to be counted as
+                                                     null
+     --undo=bool                  True               enable undo/redo
+     --col-cache-size=int         0                  max number of cache en‐
+                                                     tries in each cached col‐
+                                                     umn
+     --clean-names                False              clean column/sheet names
+                                                     to be valid Python iden‐
+                                                     tifiers
+     --default-width=int          20                 default column width
+     --default-height=int         4                  default column height
+     --textwrap-cells=bool        True               wordwrap text for multi‐
+                                                     line rows
+     --quitguard                  False              confirm before quitting
+                                                     modified sheet
+     --debug                      False              exit on error and display
+                                                     stacktrace
+     --skip=int                   0                  skip N rows before header
+     --header=int                 1                  parse first N rows as
+                                                     column names
+     --load-lazy                  False              load subsheets always
+                                                     (False) or lazily (True)
+     --force-256-colors           False              use 256 colors even if
+                                                     curses reports fewer
+     --note-pending=str           ⌛                 note to display for pend‐
+                                                     ing cells
+     --note-format-exc=str        ?                  cell note for an excep‐
+                                                     tion during formatting
+     --note-getter-exc=str        !                  cell note for an excep‐
+                                                     tion during computation
+     --note-type-exc=str          !                  cell note for an excep‐
+                                                     tion during type conver‐
+                                                     sion
+     --scroll-incr=int            -3                 amount to scroll with
+                                                     scrollwheel
+     --name-joiner=str            _                  string to join sheet or
+                                                     column names
+     --value-joiner=str                              string to join display
+                                                     values
+     --wrap                       False              wrap text to fit window
+                                                     width on TextSheet
+     --save-filetype=str          tsv                specify default file type
+                                                     to save as
+     --profile                    False              enable profiling on
+                                                     threads
+     --min-memory-mb=int          0                  minimum memory to con‐
+                                                     tinue loading and async
+                                                     processing
+     --encoding=str               utf-8              encoding passed to
+                                                     codecs.open
+     --encoding-errors=str        surrogateescape    encoding_errors passed to
+                                                     codecs.open
+     --input-history=str                             basename of file to store
+                                                     persistent input history
+     --bulk-select-clear          False              clear selected rows be‐
+                                                     fore new bulk selections
+     --some-selected-rows         False              if no rows selected, if
+                                                     True, someSelectedRows
+                                                     returns all rows; if
+                                                     False, fails
+     --delimiter=str                                 field delimiter to use
+                                                     for tsv/usv filetype
+     --row-delimiter=str                             " row delimiter to use
+                                                     for tsv/usv filetype
+     --tsv-safe-newline=str                          replacement for newline
+                                                     character when saving to
+                                                     tsv
+     --tsv-safe-tab=str                              replacement for tab char‐
+                                                     acter when saving to tsv
+     --visibility=int             0                  visibility level (0=low,
+                                                     1=high)
+     --default-sample-size=int    100                number of rows to sample
+                                                     for regex.split (0=all)
+     --fmt-expand-dict=str        %s.%s              format str to use for
+                                                     names of columns expanded
+                                                     from dict (colname, key)
+     --fmt-expand-list=str        %s[%s]             format str to use for
+                                                     names of columns expanded
+                                                     from list (colname, in‐
+                                                     dex)
+     --json-indent=NoneType       None               indent to use when saving
+                                                     json
+     --json-sort-keys             False              sort object keys when
+                                                     saving to json
+     --default-colname=str                           column name to use for
+                                                     non-dict rows
+     --filetype=str                                  specify file type
+     --replay-wait=float          0.0                time to wait between re‐
+                                                     played commands, in sec‐
+                                                     onds
+     --replay-movement            False              insert movements during
+                                                     replay
+     --rowkey-prefix=str          キ                 string prefix for rowkey
+                                                     in the cmdlog
+     --cmdlog-histfile=str                           file to autorecord each
+                                                     cmdlog action to
+     --confirm-overwrite=bool     True               whether to prompt for
+                                                     overwrite confirmation on
+                                                     save
+     --safe-error=str             #ERR               error string to use while
+                                                     saving
+     --clipboard-copy-cmd=str     xclip -selection clipboard -filter
+                                                     command to copy stdin to
+                                                     system clipboard
+     --clipboard-paste-cmd=str    xclip -selection clipboard -o
+                                                     command to send contents
+                                                     of system clipboard to
+                                                     stdout
+     --fancy-chooser              False              a nicer selection inter‐
+                                                     face for aggregators and
+                                                     jointype
+     --describe-aggrs=str         mean stdev         numeric aggregators to
+                                                     calculate on Describe
+                                                     sheet
+     --histogram-bins=int         0                  number of bins for his‐
+                                                     togram of numeric columns
+     --numeric-binning            False              bin numeric columns into
+                                                     ranges
+     --regex-flags=str            I                  flags to pass to re.com‐
+                                                     pile() [AILMSUX]
+     --regex-maxsplit=int         0                  maxsplit to pass to
+                                                     regex.split
+     --show-graph-labels=bool     True               show axes and legend on
+                                                     graph
+     --plot-colors=str                               list of distinct colors
+                                                     to use for plotting dis‐
+                                                     tinct objects
+     --zoom-incr=float            2.0                amount to multiply cur‐
+                                                     rent zoomlevel when zoom‐
+                                                     ing
+     --motd-url=str                                  source of randomized
+                                                     startup messages
+     --dir-recurse                False              walk source path recur‐
+                                                     sively on DirSheet
+     --dir-hidden                 False              load hidden files on
+                                                     DirSheet
+     --config=Path                /home/kefala/.visidatarc
+                                                     config file to exec in
+                                                     Python
+     --play=str                                      file.vd to replay
+     --batch                      False              replay in batch mode
+                                                     (with no interface and
+                                                     all status sent to std‐
+                                                     out)
+     --output=NoneType            None               save the final visible
+                                                     sheet to output at the
+                                                     end of replay
+     --preplay=str                                   longnames to preplay be‐
+                                                     fore replay
+     --imports=str                plugins            imports to preload before
+                                                     .visidatarc (command-line
+                                                     only)
+     --nothing                    False              no config, no plugins,
+                                                     nothing extra
+     --unfurl-empty               False              if unfurl includes rows
+                                                     for empty containers
+     --incr-base=float            1.0                start value for column
+                                                     increments
+     --csv-dialect=str            excel              dialect passed to
+                                                     csv.reader
+     --csv-delimiter=str          ,                  delimiter passed to
+                                                     csv.reader
+     --csv-quotechar=str          "                  quotechar passed to
+                                                     csv.reader
+     --csv-skipinitialspace=bool  True               skipinitialspace passed
+                                                     to csv.reader
+     --csv-escapechar=NoneType    None               escapechar passed to
+                                                     csv.reader
+     --csv-lineterminator=str                        " lineterminator passed
+                                                     to csv.writer
+     --safety-first               False              sanitize input/output to
+                                                     handle edge cases, with a
+                                                     performance cost
+     --xlsx-meta-columns          False              include columns for cell
+                                                     objects, font colors, and
+                                                     fill colors
+     --sqlite-onconnect=str                          sqlite statement to exe‐
+                                                     cute after opening a con‐
+                                                     nection
+     --fixed-rows=int             1000               number of rows to check
+                                                     for fixed width columns
+     --fixed-maxcols=int          0                  max number of fixed-width
+                                                     columns to create (0 is
+                                                     no max)
+     --postgres-schema=str        public             The desired schema for
+                                                     the Postgres database
+     --http-max-next=int          0                  max next.url pages to
+                                                     follow in http response
+     --http-req-headers=dict      {}                 http headers to send to
+                                                     requests
+     --html-title=str             &lt;h2&gt;{sheet.name}&lt;/h2&gt;
+                                                     table header when saving
+                                                     to html
+     --pcap-internet=str          n                  (y/s/n) if save_dot in‐
+                                                     cludes all internet hosts
+                                                     separately (y), combined
+                                                     (s), or does not include
+                                                     the internet (n)
+     --xml-parser-huge-tree=bool  True               allow very deep trees and
+                                                     very long text content
+     --graphviz-edge-labels=bool  True               whether to include edge
+                                                     labels on graphviz dia‐
+                                                     grams
+     --npy-allow-pickle           False              numpy allow unpickling
+                                                     objects (unsafe)
+     --pdf-tables                 False              parse PDF for tables in‐
+                                                     stead of pages of text
+     --plugins-url=str            https://visidata.org/plugins/plugins.jsonl
+                                                     source of plugins sheet
+     --plugins-autoload=bool      True               do not autoload plugins
+                                                     if False
+
+   DISPLAY OPTIONS
+     Display options can only be set via the Options Sheet or a .visidatarc
+     (see FILES).
+
+     disp_splitwin_pct   0                   height of second sheet on screen
+     color_sidebar       black on 114 blue   color of sidebar
+     disp_float_fmt      {:.02f}             default fmtstr to format for
+                                             float values
+     disp_int_fmt        {:d}                default fmtstr to format for int
+                                             values
+     disp_note_none      ⌀                   visible contents of a cell whose
+                                             value is None
+     disp_truncator      …                   indicator that the contents are
+                                             only partially visible
+     disp_oddspace       ·                   displayable character for odd
+                                             whitespace
+     disp_more_left      &lt;                   header note indicating more col‐
+                                             umns to the left
+     disp_more_right     &gt;                   header note indicating more col‐
+                                             umns to the right
+     disp_error_val                          displayed contents for computa‐
+                                             tion exception
+     disp_ambig_width    1                   width to use for unicode chars
+                                             marked ambiguous
+     disp_pending                            string to display in pending
+                                             cells
+     color_note_pending  bold magenta        color of note in pending cells
+     color_note_type     226 yellow          color of cell note for non-str
+                                             types in anytype columns
+     color_note_row      220 yellow          color of row note on left edge
+     disp_column_sep     │                   separator between columns
+     disp_keycol_sep     ║                   separator between key columns and
+                                             rest of columns
+     disp_rowtop_sep     │
+     disp_rowmid_sep     ⁝
+     disp_rowbot_sep     ⁝
+     disp_rowend_sep     ║
+     disp_keytop_sep     ║
+     disp_keymid_sep     ║
+     disp_keybot_sep     ║
+     disp_endtop_sep     ║
+     disp_endmid_sep     ║
+     disp_endbot_sep     ║
+     disp_selected_note  •
+     disp_sort_asc       ↑↟⇞⇡⇧⇑              characters for ascending sort
+     disp_sort_desc      ↓↡⇟⇣⇩⇓              characters for descending sort
+     color_default       white on black      the default fg and bg colors
+     color_default_hdr   bold                color of the column headers
+     color_bottom_hdr    underline           color of the bottom header row
+     color_current_row   reverse             color of the cursor row
+     color_current_col   bold                color of the cursor column
+     color_current_hdr   bold reverse        color of the header for the cur‐
+                                             sor column
+     color_column_sep    246 blue            color of column separators
+     color_key_col       81 cyan             color of key columns
+     color_hidden_col    8                   color of hidden columns on
+                                             metasheets
+     color_selected_row  215 yellow          color of selected rows
+     disp_rstatus_fmt     {sheet.longname} {sheet.nRows:9d} {sheet.rowtype}
+                                             {sheet.modifiedStatus}
+                                             {sheet.options.disp_selected_note}{sheet.nSelectedRows}
+                                             right-side status format string
+     disp_status_fmt     {sheet.shortcut}› {sheet.name}|
+                                             status line prefix
+     disp_lstatus_max    0                   maximum length of left status
+                                             line
+     disp_status_sep      │                  separator between statuses
+     color_keystrokes    bold 233 black on 110 cyan
+                                             color of input keystrokes on sta‐
+                                             tus line
+     color_status        bold black on 110 cyan
+                                             status line color
+     color_error         red                 error message color
+     color_warning       yellow              warning message color
+     color_top_status    underline           top window status bar color
+     color_active_status black on 110 cyan    active window status bar color
+     color_inactive_status 8 on black        inactive window status bar color
+     color_working       green               color of system running smoothly
+     color_edit_cell     white               cell color to use when editing
+                                             cell
+     disp_edit_fill      _                   edit field fill character
+     disp_unprintable    ·                   substitute character for unprint‐
+                                             ables
+     disp_currency_fmt   %.02f               default fmtstr to format for cur‐
+                                             rency values
+     disp_date_fmt       %Y-%m-%d            default fmtstr to strftime for
+                                             date values
+     disp_replay_play    ▶                   status indicator for active re‐
+                                             play
+     disp_replay_pause   ‖                   status indicator for paused re‐
+                                             play
+     color_status_replay green               color of replay status indicator
+     disp_formatter      generic             formatter to use for display and
+                                             saving
+     disp_menu           True                show menu on top line when not
+                                             active
+     disp_menu_keys      True                show keystrokes inline in sub‐
+                                             menus
+     color_menu          black on 110 cyan   color of menu items in general
+     color_menu_active   223 yellow on black
+                                             color of active menu items
+     color_menu_spec     black on 34 green   color of sheet-specific menu
+                                             items
+     color_menu_help     black italic on 110 cyan
+                                             color of helpbox
+     disp_menu_boxchars  ││──┌┐└┘├┤          box characters to use for menus
+     disp_menu_more      »                   command submenu indicator
+     disp_menu_push      ⎘                   indicator if command pushes sheet
+                                             onto sheet stack
+     disp_menu_input     …                   indicator if input required for
+                                             command
+     disp_menu_fmt       Ctrl+H for help menu
+                                             right-side menu format string
+     disp_histogram      ■                   histogram element character
+     disp_histolen       50                  width of histogram column
+     disp_canvas_charset
+                                             ⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿
+                                             charset to render 2x4 blocks on
+                                             canvas
+     disp_pixel_random   False               randomly choose attr from set of
+                                             pixels instead of most common
+     color_graph_hidden  238 blue            color of legend for hidden attri‐
+                                             bute
+     color_graph_selected bold               color of selected graph points
+     color_graph_axis    bold                color for graph axis labels
+     color_add_pending   green               color for rows pending add
+     color_change_pending reverse yellow     color for cells pending modifica‐
+                                             tion
+     color_delete_pending red                color for rows pending delete
+     color_xword_active  green               color of active clue
+
+EXAMPLES
+           vd
+     launch DirSheet for current directory
+
+           vd foo.tsv
+     open the file foo.tsv in the current directory
+
+           vd -f ddw
+     open blank sheet of type ddw
+
+           vd new.tsv
+     open new blank tsv sheet named new
+
+           vd -f sqlite bar.db
+     open the file bar.db as a sqlite database
+
+           vd foo.tsv -n -f sqlite bar.db
+     open foo.tsv as tsv and bar.db as a sqlite database
+
+           vd -f sqlite foo.tsv bar.db
+     open both foo.tsv and bar.db as a sqlite database
+
+           vd -b countries.fixed -o countries.tsv
+     convert countries.fixed (in fixed width format) to countries.tsv (in tsv
+     format)
+
+           vd postgres://username:password@hostname:port/database
+     open a connection to the given postgres database
+
+           vd --play tests/pivot.vd --replay-wait 1 --output tests/pivot.tsv
+     replay tests/pivot.vd, waiting 1 second between commands, and output the
+     final sheet to test/pivot.tsv
+
+           ls -l | vd -f fixed --skip 1 --header 0
+     parse the output of ls -l into usable data
+
+           ls | vd | lpr
+     interactively select a list of filenames to send to the printer
+
+           vd newfile.tsv
+     open a blank sheet named newfile if file does not exist
+
+           vd sample.xlsx +:sheet1:2:3
+     launch with sheet1 at top-of-stack, and cursor at column 2 and row 3
+
+           vd -P open-plugins
+     preplay longname open-plugins before starting the session
+
+FILES
+     At the start of every session, VisiData looks for $HOME/.visidatarc, and
+     calls Python exec() on its contents if it exists. For example:
+
+        options.min_memory_mb=100  # stop processing without 100MB free
+
+        bindkey('0', 'go-leftmost')   # alias '0' to go to first column, like vim
+
+        def median(values):
+            L = sorted(values)
+            return L[len(L)//2]
+
+        vd.aggregator('median', median)
+
+     Functions defined in .visidatarc are available in python expressions
+     (e.g. in derived columns).
+
+SUPPORTED SOURCES
+     Core VisiData includes these sources:
+
+        tsv (tab-separated value)
+           Plain and simple. VisiData writes tsv format by default. See the
+           --tsv-delimiter option.
+
+        csv (comma-separated value)
+           .csv files are a scourge upon the earth, and still regrettably
+           common.
+           See the --csv-dialect, --csv-delimiter, --csv-quotechar, and
+           --csv-skipinitialspace options.
+           Accepted dialects are excel-tab, unix, and excel.
+
+        fixed (fixed width text)
+           Columns are autodetected from the first 1000 rows (adjustable with
+           --fixed-rows).
+
+        json (single object) and jsonl/ndjson/ldjson (one object per line).
+           Cells containing lists (e.g. [3]) or dicts ({3}) can be expanded
+           into new columns with ( and unexpanded with ).
+
+        sqlite
+           May include multiple tables. The initial sheet is the table
+           directory; Enter loads the entire table into memory. z^S saves
+           modifications to source.
+
+     URL schemes are also supported:
+        http (requires requests); can be used as transport for with another
+        filetype
+
+     For a list of all remaining formats supported by VisiData, see
+     https://visidata.org/formats.
+
+     In addition, .zip, .gz, .bz2, .xz, ,zstd, and .zst files are decompressed
+     on the fly.
+
+AUTHOR
+     VisiData was made by Saul Pwanson &lt;vd@saul.pw&gt;.
+
+Linux/MacOS                    January 11, 2023                    Linux/MacOS</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="extract.html">
+        Extract »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="convert.html">
+        « Convert
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/extract.html b/docs/console/extract.html
new file mode 100644
index 0000000000..4568333b73
--- /dev/null
+++ b/docs/console/extract.html
@@ -0,0 +1,3528 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With Frtictionless  extract  command you can extract data from a file or a dataset.">
+<meta name="keywords" content="extract">
+<link rel="icon" href="../../assets/logo.png">
+<title>Extract | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/extract.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Extract</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Wit this command Frictionless will drop all invalid data like type errors in cells. Use <code>validate</code> if needed.
+  </div>
+</div></div><p>With Frtictionless <code>extract</code> command you can extract data from a file or a dataset.</p>
+<h2>Normal Mode</h2>
+<p>By default, it outputs metadata visually formatted:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract tables/*.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
+┃ name   ┃ type  ┃ path              ┃
+┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
+│ chunk1 │ table │ tables/chunk1.csv │
+│ chunk2 │ table │ tables/chunk2.csv │
+└────────┴───────┴───────────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+     chunk1
+┏━━━━┳━━━━━━━━━┓
+┃ id ┃ name    ┃
+┡━━━━╇━━━━━━━━━┩
+│ 1  │ english │
+└────┴─────────┘
+    chunk2
+┏━━━━┳━━━━━━━━┓
+┃ id ┃ name   ┃
+┡━━━━╇━━━━━━━━┩
+│ 2  │ 中国人 │
+└────┴────────┘</code></pre>
+
+  </div>
+  </div><h2>Yaml/Json Mode</h2>
+<p>It's possible to output as <code>YAML</code> or <code>JSON</code>, for example:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract tables/*.csv --yaml
+</code></pre>
+
+<pre><code class="language-markup">chunk1:
+- id: 1
+  name: english
+chunk2:
+- id: 2
+  name: 中国人</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="index.html">
+        Index »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="explore.html">
+        « Explore
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/index.html b/docs/console/index.html
new file mode 100644
index 0000000000..509f42538d
--- /dev/null
+++ b/docs/console/index.html
@@ -0,0 +1,3589 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Indexing resource in Frictionless terms means loading a data table into a database. Let's explore how this feature works in different modes.">
+<meta name="keywords" content="index">
+<link rel="icon" href="../../assets/logo.png">
+<title>Index | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/index.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Index</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Wit this command Frictionless will drop all invalid data like type errors in cells. Use <code>validate</code> if needed.
+  </div>
+</div></div><div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This functionality has been published in <code>frictionless@5.5</code> as a feature preview and request for comments. The implementation is raw and doesn't cover many edge cases.
+  </div>
+</div></div><p>Indexing resource in Frictionless terms means loading a data table into a database. Let's explore how this feature works in different modes.</p>
+<h2>Installation</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[sql]
+</code></pre>
+
+  </div>
+  </div><h2>Normal Mode</h2>
+<p>This mode is supported for any database that is supported by <code>sqlalchemy</code>. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by <code>null</code> values and in-general it guarantees to finish successfully for any data even very invalid.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db
+frictionless extract sqlite:///index/project.db --table table --json
+</code></pre>
+
+<pre><code class="language-markup">──────────────────────────────────── Index ─────────────────────────────────────
+
+[table] Indexed 3 rows in 0.245 seconds
+──────────────────────────────────── Result ────────────────────────────────────
+Succesefully indexed 1 tables
+{
+  "project": [
+    {
+      "id": 1,
+      "name": "english"
+    },
+    {
+      "id": 2,
+      "name": "中国人"
+    }
+  ]
+}</code></pre>
+
+  </div>
+  </div><h2>Fast Mode</h2>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    For the SQLite in fast mode, it requires <code>sqlite3@3.34+</code> command to be available.
+  </div>
+</div></div><p>Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using <code>COPY</code> in Potgresql and <code>.import</code> in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --fast
+frictionless extract sqlite:///index/project.db --table table --json
+</code></pre>
+
+<pre><code class="language-markup">──────────────────────────────────── Index ─────────────────────────────────────
+
+[table] Indexed 30 bytes in 0.255 seconds
+──────────────────────────────────── Result ────────────────────────────────────
+Succesefully indexed 1 tables
+{
+  "project": [
+    {
+      "id": 1,
+      "name": "english"
+    },
+    {
+      "id": 2,
+      "name": "中国人"
+    }
+  ]
+}</code></pre>
+
+  </div>
+  </div><h3>Solution 1: Fallback</h3>
+<p>To ensure that the data will be successfully indexed it's possible to use <code>fallback</code> option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback
+</code></pre>
+
+  </div>
+  </div><h3>Solution 2: QSV</h3>
+<p>Another option is to provide a path to <a href="https://github.com/jqnatividad/qsv">QSV</a> binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path
+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="list.html">
+        List »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="extract.html">
+        « Extract
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/list.html b/docs/console/list.html
new file mode 100644
index 0000000000..65e06495a1
--- /dev/null
+++ b/docs/console/list.html
@@ -0,0 +1,3499 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With Frtictionless  list  command you can get a list of resources from a data source. For more detailed output see  describe  command.">
+<meta name="keywords" content="list">
+<link rel="icon" href="../../assets/logo.png">
+<title>List | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/list.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>List</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    The difference with between <code>describe</code> and <code>list</code> command: if <code>datapackage.json</code> is not provided <code>describe</code> will load a sample from every tabular data file in a dataset and infer a schema while <code>list</code> is a very lean and quick command operating only with available metadata and not touching actual data files.
+  </div>
+</div></div><p>With Frtictionless <code>list</code> command you can get a list of resources from a data source. For more detailed output see <a href="describe.html"><code>describe</code></a> command.</p>
+<h2>Normal Mode</h2>
+<p>By default, it outputs metadata visually formatted:</p>
+<div><pre><code class="language-bash">frictionless list tables/*.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
+┃ name   ┃ type  ┃ path              ┃
+┡━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
+│ chunk1 │ table │ tables/chunk1.csv │
+│ chunk2 │ table │ tables/chunk2.csv │
+└────────┴───────┴───────────────────┘</code></pre>
+</div><h2>Yaml/Json Mode</h2>
+<p>It's possible to output as <code>YAML</code> or <code>JSON</code>, for example:</p>
+<div><pre><code class="language-bash">frictionless list tables/*.csv --yaml
+</code></pre>
+
+<pre><code class="language-markup">- name: chunk1
+  type: table
+  path: tables/chunk1.csv
+  scheme: file
+  format: csv
+  mediatype: text/csv
+- name: chunk2
+  type: table
+  path: tables/chunk2.csv
+  scheme: file
+  format: csv
+  mediatype: text/csv</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="publish.html">
+        Publish »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="index.html">
+        « Index
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/overview.html b/docs/console/overview.html
new file mode 100644
index 0000000000..6309271aa8
--- /dev/null
+++ b/docs/console/overview.html
@@ -0,0 +1,3550 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless.">
+<meta name="keywords" content="overview">
+<link rel="icon" href="../../assets/logo.png">
+<title>Overview | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/overview.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Overview</h1>
+<p>The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless.</p>
+<div><div class="livemark-video">
+  <iframe class="" width="600" height="400" src="https://www.youtube.com/embed/7a_rL9j-gn8" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
+</div></div><h2>Install</h2>
+<p>To install the package please follow the <a href="../getting-started.html">Getting Started</a> guide. Usually, a simple installation using Pip or Anaconda will install the <code>frictionless</code> binary on your computer so you don't need to install CLI aditionally.</p>
+<h2>Commands</h2>
+<p>The <code>frictionless</code> binary requires providing a command like <code>describe</code> or <code>validate</code>:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe # to describe your data
+frictionless explore # to explore your data
+frictionless extract # to extract your data
+frictionless index # to index your data
+frictionless list # to list your data
+frictionless publish # to publish your data
+frictionless query # to query your data
+frictionless script # to script your data
+frictionless validate # to validate your data
+frictionless --help # to get list of the command
+frictionless --version # to get the version
+</code></pre>
+
+  </div>
+  </div><h2>Arguments</h2>
+<p>All the arguments for the main CLI command are the same as they are in Python. You can read <a href="../guides/describing-data.html">Guides</a> and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes):</p>
+<div><pre><code>Python: validate('data/table.csv', limit_errors=1)
+CLI: $ validate data/table.csv --limit-errors 1
+</code></pre>
+</div><p>To get help for a command and its arguments you can use the help flag with the command:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe --help # to get help for describe
+frictionless extract --help # to get help for extract
+frictionless validate --help # to get help for validate
+frictionless transform --help # to get help for transform
+</code></pre>
+
+  </div>
+  </div><h2>Outputs</h2>
+<p>Usually, Frictionless commands returns pretty-formatted tabular data like <code>extract</code> or <code>validate</code> do. For the <code>describe</code> command you get a metadata back and you can choose in what format to return it:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe # default YAML with a commented front-matter
+frictionless describe --yaml # standard YAML
+frictionless describe --json # standard JSON
+</code></pre>
+
+  </div>
+  </div><h2>Errors</h2>
+<p>The Frictionless' CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the <a href="https://github.com/frictionlessdata/frictionless-py/issues">Issue Tracker</a>.</p>
+<h2>Debug</h2>
+<p>To debug a problem please use:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless command --debug
+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="describe.html">
+        Describe »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../guides/transforming-data.html">
+        « Transforming Data
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/publish.html b/docs/console/publish.html
new file mode 100644
index 0000000000..dbaa1c1f9f
--- /dev/null
+++ b/docs/console/publish.html
@@ -0,0 +1,3474 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With  publish  command you can publish your dataset to a data publishing platform like CKAN:">
+<meta name="keywords" content="publish">
+<link rel="icon" href="../../assets/logo.png">
+<title>Publish | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/publish.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Publish</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    Currently, only publishing to CKAN is supported; Github and Zenodo are in active development.
+  </div>
+</div></div><p>With <code>publish</code> command you can publish your dataset to a data publishing platform like CKAN:</p>
+<div><pre><code class="language-bash">frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title "My best dataset"
+</code></pre>
+</div><p>It will ask for an API Key to upload your metadata and data. As a result:</p>
+<div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/publish.png">
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="query.html">
+        Query »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="list.html">
+        « List
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/query.html b/docs/console/query.html
new file mode 100644
index 0000000000..c3307ff027
--- /dev/null
+++ b/docs/console/query.html
@@ -0,0 +1,3489 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With  query  command you can explore tabular files within a Sqlite database.">
+<meta name="keywords" content="query">
+<link rel="icon" href="../../assets/logo.png">
+<title>Query | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/query.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Query</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Wit this command Frictionless will drop all invalid data like type errors in cells. Use <code>validate</code> if needed.
+  </div>
+</div></div><p>With <code>query</code> command you can explore tabular files within a Sqlite database.</p>
+<h2>Installation</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[sql]
+pip install frictionless[sql,zenodo] # for examples in this tutorial
+</code></pre>
+
+  </div>
+  </div><h2>Usage</h2>
+<div><pre><code class="language-bash">frictionless query https://zenodo.org/record/3977957
+</code></pre>
+</div><div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/query.png">
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="script.html">
+        Script »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="publish.html">
+        « Publish
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/script.html b/docs/console/script.html
new file mode 100644
index 0000000000..a009c56308
--- /dev/null
+++ b/docs/console/script.html
@@ -0,0 +1,3489 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With  script  command you can explore tabular files with Pandas by one console command">
+<meta name="keywords" content="script">
+<link rel="icon" href="../../assets/logo.png">
+<title>Script | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/script.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Script</h1>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Wit this command Frictionless will drop all invalid data like type errors in cells. Use <code>validate</code> if needed.
+  </div>
+</div></div><p>With <code>script</code> command you can explore tabular files with Pandas by one console command</p>
+<h2>Installation</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[sql]
+pip install frictionless[sql,zenodo] # for examples in this tutorial
+</code></pre>
+
+  </div>
+  </div><h2>Usage</h2>
+<div><pre><code class="language-bash">frictionless script https://zenodo.org/record/3977957
+</code></pre>
+</div><div><div class="livemark-image">
+  <img class="" width="unset" height="unset" src="../../assets/script.png">
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="validate.html">
+        Validate »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="query.html">
+        « Query
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/console/validate.html b/docs/console/validate.html
new file mode 100644
index 0000000000..87e0926a03
--- /dev/null
+++ b/docs/console/validate.html
@@ -0,0 +1,3512 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With  validate  command you can validate your tabular files (indivisual or the whole dataset). For example:">
+<meta name="keywords" content="validate">
+<link rel="icon" href="../../assets/logo.png">
+<title>Validate | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/console/validate.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Validate</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    For more information for data validation with Frictionless, read this <a href="../guides/validating-data.html">Validating Data</a> tutorial.
+  </div>
+</div></div><p>With <code>validate</code> command you can validate your tabular files (indivisual or the whole dataset). For example:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate table.csv invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                  dataset
+┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name    ┃ type  ┃ path        ┃ status  ┃
+┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ table   │ table │ table.csv   │ VALID   │
+│ invalid │ table │ invalid.csv │ INVALID │
+└─────────┴───────┴─────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                    invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ blank-label     │ Label in the header in field at position    │
+│      │       │                 │ "3" is blank                                │
+│ None │ 4     │ duplicate-label │ Label "name" in the header at position "4"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 2    │ 3     │ missing-cell    │ Row at position "2" has a missing cell in   │
+│      │       │                 │ field "field3" at position "3"              │
+│ 2    │ 4     │ missing-cell    │ Row at position "2" has a missing cell in   │
+│      │       │                 │ field "name2" at position "4"               │
+│ 3    │ 3     │ missing-cell    │ Row at position "3" has a missing cell in   │
+│      │       │                 │ field "field3" at position "3"              │
+│ 3    │ 4     │ missing-cell    │ Row at position "3" has a missing cell in   │
+│      │       │                 │ field "name2" at position "4"               │
+│ 4    │ None  │ blank-row       │ Row at position "4" is completely blank     │
+│ 5    │ 5     │ extra-cell      │ Row at position "5" has an extra value in   │
+│      │       │                 │ field at position "5"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../framework/actions.html">
+        Data Actions »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="script.html">
+        « Script
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/cell.html b/docs/errors/cell.html
new file mode 100644
index 0000000000..aadcb2cbb8
--- /dev/null
+++ b/docs/errors/cell.html
@@ -0,0 +1,3856 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="cell,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Cell Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/cell.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Cell Errors</h1>
+<h2>Cell Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>cell-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Cell Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Cell Error</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Cell Error</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Extra Cell</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>extra-cell</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Extra Cell</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" has an extra value in field at position "{fieldNumber}"</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Missing Cell</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>missing-cell</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Missing Cell</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" has a missing cell in field "{fieldName}" at position "{fieldNumber}"</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Type Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>type-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Type Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value does not match the schema type and format for this field.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Type error in the cell "{cell}" in row "{rowNumber}" and field "{fieldName}" at position "{fieldNumber}": {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Constraint Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>constraint-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Constraint Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A field value does not conform to a constraint.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The cell "{cell}" in row at position "{rowNumber}" and field "{fieldName}" at position "{fieldNumber}" does not conform to a constraint: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Unique Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>unique-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Unique Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This field is a unique field but it contains a value that has been used in another row.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" has unique constraint violation in field "{fieldName}" at position "{fieldNumber}": {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Truncated Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>truncated-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Truncated Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value is possible truncated.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Forbidden Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>forbidden-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Forbidden Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value is forbidden.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Sequential Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>sequential-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Sequential Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value is not sequential.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Ascii Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>ascii-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Ascii Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The cell contains non-ascii characters.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row #cell</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.cellerror" class="livemark-reference-heading">errors.CellError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.cellerror" class="livemark-reference-heading">errors.CellError <small>(class)</small></h3>
+      <p>Cell error representation.
+
+A base class for all the errors related to the cell value.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str, cells: List[str], row_number: int, cell: str, field_name: str, field_number: int) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          cells
+          (List[str])        </li>
+                <li>
+          row_number
+          (int)        </li>
+                <li>
+          cell
+          (str)        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          field_number
+          (int)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-errors.cellerror.cell" class="livemark-reference-heading">errors.cellError.cell <small>(property)</small></h3>
+      <p>
+    Cell where the error occurred.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.cellerror.field_name" class="livemark-reference-heading">errors.cellError.field_name <small>(property)</small></h3>
+      <p>
+    Name of the field that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.cellerror.field_number" class="livemark-reference-heading">errors.cellError.field_number <small>(property)</small></h3>
+      <p>
+    Index of the field that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-from_row" class="livemark-reference-heading">errors.CellError.from_row <small>(method) (static)</small></h3>
+      <p>Create and error from a cell</p>
+            <h4>Signature</h4>
+      <p>(row: Row, *, note: str, field_name: str)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          row
+          (Row): row        </li>
+                <li>
+          note
+          (str): note        </li>
+                <li>
+          field_name
+          (str): field name        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../codebase/authors.html">
+        Authors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="row.html">
+        « Row Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/data.html b/docs/errors/data.html
new file mode 100644
index 0000000000..85a50cdaca
--- /dev/null
+++ b/docs/errors/data.html
@@ -0,0 +1,3535 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="data,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Data Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/data.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Data Errors</h1>
+<h2>Data Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>data-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Data Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>There is a data error.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Data error: {note}</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.dataerror" class="livemark-reference-heading">errors.DataError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.dataerror" class="livemark-reference-heading">errors.DataError <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="file.html">
+        File Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="resource.html">
+        « Resource Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/file.html b/docs/errors/file.html
new file mode 100644
index 0000000000..d7acac881f
--- /dev/null
+++ b/docs/errors/file.html
@@ -0,0 +1,3597 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="file,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>File Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/file.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>File Errors</h1>
+<h2>File Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>file-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>File Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>There is a file error.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>General file error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#file</td>
+</tr>
+</tbody></table><h2>Hash Count Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>hash-count</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Hash Count Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This error can happen if the data is corrupted.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source does not match the expected hash count: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#file</td>
+</tr>
+</tbody></table><h2>Byte Count Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>byte-count</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Byte Count Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This error can happen if the data is corrupted.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source does not match the expected byte count: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#file</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.fileerror" class="livemark-reference-heading">errors.FileError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.fileerror" class="livemark-reference-heading">errors.FileError <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="table.html">
+        Table Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="data.html">
+        « Data Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/header.html b/docs/errors/header.html
new file mode 100644
index 0000000000..cb19f5fdef
--- /dev/null
+++ b/docs/errors/header.html
@@ -0,0 +1,3589 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="header,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Header Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/header.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Header Errors</h1>
+<h2>Header Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>header-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Header Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Cell Error</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Cell Error</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header</td>
+</tr>
+</tbody></table><h2>Blank Header</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>blank-header</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Blank Header</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This header is empty. A header should contain at least one value.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Header is completely blank</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.headererror" class="livemark-reference-heading">errors.HeaderError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.headererror" class="livemark-reference-heading">errors.HeaderError <small>(class)</small></h3>
+      <p>Header error representation.
+
+A base class for all the errors related to the resource header.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str, labels: List[str], row_numbers: List[int]) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          labels
+          (List[str])        </li>
+                <li>
+          row_numbers
+          (List[int])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-errors.headererror.labels" class="livemark-reference-heading">errors.headerError.labels <small>(property)</small></h3>
+      <p>
+    List of labels that has errors.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.headererror.row_numbers" class="livemark-reference-heading">errors.headerError.row_numbers <small>(property)</small></h3>
+      <p>
+    Row number where the error occurred.
+    </p>
+            <h4>Signature</h4>
+      <p>List[int]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="label.html">
+        Label Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="table.html">
+        « Table Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/label.html b/docs/errors/label.html
new file mode 100644
index 0000000000..a706585983
--- /dev/null
+++ b/docs/errors/label.html
@@ -0,0 +1,3722 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="label,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Label Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/label.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Label Errors</h1>
+<h2>Label Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>label-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Label Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Label Error</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Label Error</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Extra Label</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>extra-label</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Extra Label</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The header of the data source contains label that does not exist in the provided schema.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>There is an extra label "{label}" in header at position "{fieldNumber}"</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Missing Label</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>missing-label</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Missing Label</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Based on the schema there should be a label that is missing in the data's header.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>There is a missing label in the header's field "{fieldName}" at position "{fieldNumber}"</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Blank Label</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>blank-label</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Blank Label</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A label in the header row is missing a value. Label should be provided and not be blank.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Label in the header in field at position "{fieldNumber}" is blank</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Duplicate Label</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>duplicate-label</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Duplicate Label</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Two columns in the header row have the same value. Column names should be unique.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Label "{label}" in the header at position "{fieldNumber}" is duplicated to a label: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Incorrect Label</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>incorrect-label</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Incorrect Label</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>One of the data source header does not match the field name defined in the schema.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Label "{label}" in field {fieldName} at position "{fieldNumber}" does not match the field name in the schema</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #header #label</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.labelerror" class="livemark-reference-heading">errors.LabelError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.labelerror" class="livemark-reference-heading">errors.LabelError <small>(class)</small></h3>
+      <p>Label error representation.
+
+A base class for all the errors related to the labels of the columns/fields.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str, labels: List[str], row_numbers: List[int], label: str, field_name: str, field_number: int) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          labels
+          (List[str])        </li>
+                <li>
+          row_numbers
+          (List[int])        </li>
+                <li>
+          label
+          (str)        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          field_number
+          (int)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-errors.labelerror.label" class="livemark-reference-heading">errors.labelError.label <small>(property)</small></h3>
+      <p>
+    Label of the field that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.labelerror.field_name" class="livemark-reference-heading">errors.labelError.field_name <small>(property)</small></h3>
+      <p>
+    Name of the field that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.labelerror.field_number" class="livemark-reference-heading">errors.labelError.field_number <small>(property)</small></h3>
+      <p>
+    Index of the field that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="row.html">
+        Row Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="header.html">
+        « Header Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/metadata.html b/docs/errors/metadata.html
new file mode 100644
index 0000000000..e0327b7857
--- /dev/null
+++ b/docs/errors/metadata.html
@@ -0,0 +1,3960 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="metadata,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Metadata Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/metadata.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Metadata Errors</h1>
+<h2>Metadata Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>metadata-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Metadata Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>There is a metadata error.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Metadata error: {note}</td>
+</tr>
+</tbody></table><h2>Catalog Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>catalog-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Catalog Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A validation cannot be processed.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data catalog has an error: {note}</td>
+</tr>
+</tbody></table><h2>Dataset Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>dataset-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Dataset Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A validation cannot be processed.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The dataset has an error: {note}</td>
+</tr>
+</tbody></table><h2>Checklist Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>checklist-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Checklist Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided checklist is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Checklist is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Check Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>check-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Check Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided check is not valid</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Check is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Detector Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>detector-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Detector Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided detector is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Detector is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Dialect Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>dialect-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Dialect Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided dialect is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Dialect is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Control Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>control-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Control Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided control is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Control is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Inquiry Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>inquiry-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Inquiry Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided inquiry is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Inquiry is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Inquiry Task Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>inquiry-task-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Inquiry Task Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided inquiry task is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Inquiry task is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Package Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>package-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Package Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A validation cannot be processed.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data package has an error: {note}</td>
+</tr>
+</tbody></table><h2>Pipeline Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>pipeline-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Pipeline Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided pipeline is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Pipeline is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Step Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>step-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Step Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided step is not valid</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Step is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Report Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>report-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Report Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided report is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Report is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Report Task Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>report-task-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Report Task Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided report task is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Report task is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Schema Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>schema-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Schema Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided schema is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Schema is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Field Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>field-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Field Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Provided field is not valid.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Field is not valid: {note}</td>
+</tr>
+</tbody></table><h2>Stats Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>stats-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Stats Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Stats object has an error.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Stats object has an error: {note}</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.metadataerror" class="livemark-reference-heading">errors.MetadataError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.metadataerror" class="livemark-reference-heading">errors.MetadataError <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="resource.html">
+        Resource Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../fields/yearmonth.html">
+        « Yearmonth Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/resource.html b/docs/errors/resource.html
new file mode 100644
index 0000000000..021d7bad65
--- /dev/null
+++ b/docs/errors/resource.html
@@ -0,0 +1,3660 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="resource,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Resource Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/resource.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Resource Errors</h1>
+<h2>Resource Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>resource-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Resource Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>A validation cannot be processed.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data resource has an error: {note}</td>
+</tr>
+</tbody></table><h2>Source Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>source-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Source Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Data reading error because of not supported or inconsistent contents.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source has not supported or has inconsistent contents: {note}</td>
+</tr>
+</tbody></table><h2>Scheme Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>scheme-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Scheme Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Data reading error because of incorrect scheme.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source could not be successfully loaded: {note}</td>
+</tr>
+</tbody></table><h2>Format Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>format-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Format Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Data reading error because of incorrect format.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source could not be successfully parsed: {note}</td>
+</tr>
+</tbody></table><h2>Encoding Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>encoding-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Encoding Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Data reading error because of an encoding problem.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source could not be successfully decoded: {note}</td>
+</tr>
+</tbody></table><h2>Compression Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>compression-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Compression Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Data reading error because of a decompression problem.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source could not be successfully decompressed: {note}</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.resourceerror" class="livemark-reference-heading">errors.ResourceError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.resourceerror" class="livemark-reference-heading">errors.ResourceError <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="data.html">
+        Data Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="metadata.html">
+        « Metadata Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/row.html b/docs/errors/row.html
new file mode 100644
index 0000000000..c31dc66ddf
--- /dev/null
+++ b/docs/errors/row.html
@@ -0,0 +1,3819 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="row,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Row Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/row.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Row Errors</h1>
+<h2>Row Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>row-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Row Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Row Error</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row Error</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>Blank Row</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>blank-row</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Blank Row</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This row is empty. A row should contain at least one value.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" is completely blank</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>PrimaryKey Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>primary-key</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>PrimaryKey Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Values in the primary key fields should be unique for every row</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" violates the primary key: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>ForeignKey Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>foreign-key</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>ForeignKey Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>Values in the foreign key fields should exist in the reference table</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position "{rowNumber}" violates the foreign key: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>Duplicate Row</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>duplicate-row</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Duplicate Row</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The row is duplicated.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Row at position {rowNumber} is duplicated: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>Row Constraint</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>row-constraint</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Row Constraint</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value does not conform to the row constraint.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The row at position {rowNumber} has an error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table #row</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.rowerror" class="livemark-reference-heading">errors.RowError <small>(class)</small></h3>
+          <h3 id="hidden-reference-errors.foreignkeyerror" class="livemark-reference-heading">errors.ForeignKeyError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.rowerror" class="livemark-reference-heading">errors.RowError <small>(class)</small></h3>
+      <p>Row error representation.
+
+A base class for all the errors related to a row of the
+tabular data.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str, cells: List[str], row_number: int) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          cells
+          (List[str])        </li>
+                <li>
+          row_number
+          (int)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-errors.rowerror.cells" class="livemark-reference-heading">errors.rowError.cells <small>(property)</small></h3>
+      <p>
+    Values of all the cells in the row that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.rowerror.row_number" class="livemark-reference-heading">errors.rowError.row_number <small>(property)</small></h3>
+      <p>
+    Index of the row that has an error.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-from_row" class="livemark-reference-heading">errors.RowError.from_row <small>(method) (static)</small></h3>
+      <p>Create an error from a row</p>
+            <h4>Signature</h4>
+      <p>(row: Row, *, note: str)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          row
+          (Row)        </li>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-errors.foreignkeyerror" class="livemark-reference-heading">errors.ForeignKeyError <small>(class)</small></h3>
+      <p>Row error representation.
+
+A base class for all the errors related to a row of the
+tabular data.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str, cells: List[str], row_number: int, field_names: List[str], field_cells: List[str], reference_name: str, reference_field_names: List[str]) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          cells
+          (List[str])        </li>
+                <li>
+          row_number
+          (int)        </li>
+                <li>
+          field_names
+          (List[str])        </li>
+                <li>
+          field_cells
+          (List[str])        </li>
+                <li>
+          reference_name
+          (str)        </li>
+                <li>
+          reference_field_names
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-errors.foreignkeyerror.field_names" class="livemark-reference-heading">errors.foreignKeyError.field_names <small>(property)</small></h3>
+      <p>
+    Keys in the resource target column.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.foreignkeyerror.field_cells" class="livemark-reference-heading">errors.foreignKeyError.field_cells <small>(property)</small></h3>
+      <p>
+    Cells not found in the lookup table.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.foreignkeyerror.reference_name" class="livemark-reference-heading">errors.foreignKeyError.reference_name <small>(property)</small></h3>
+      <p>
+    Name of the lookup table the keys were searched on
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-errors.foreignkeyerror.reference_field_names" class="livemark-reference-heading">errors.foreignKeyError.reference_field_names <small>(property)</small></h3>
+      <p>
+    Key names in the lookup table defined as foreign keys in the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-from_row" class="livemark-reference-heading">errors.ForeignKeyError.from_row <small>(method) (static)</small></h3>
+      <p>Create an foreign-key-error from a row</p>
+            <h4>Signature</h4>
+      <p>(row: Row, *, note: str, field_names: List[str], field_values: List[Any], reference_name: str, reference_field_names: List[str])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          row
+          (Row)        </li>
+                <li>
+          note
+          (str)        </li>
+                <li>
+          field_names
+          (List[str])        </li>
+                <li>
+          field_values
+          (List[Any])        </li>
+                <li>
+          reference_name
+          (str)        </li>
+                <li>
+          reference_field_names
+          (List[str])        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="cell.html">
+        Cell Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="label.html">
+        « Label Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/errors/table.html b/docs/errors/table.html
new file mode 100644
index 0000000000..b2d678bf89
--- /dev/null
+++ b/docs/errors/table.html
@@ -0,0 +1,3713 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="table,errors">
+<link rel="icon" href="../../assets/logo.png">
+<title>Table Errors | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/errors/table.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Table Errors</h1>
+<h2>Table Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>table-error</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Table Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>There is a table error.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>General table error: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Field Count Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>field-count</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Field Count Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This error can happen if the data is corrupted.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source does not match the expected field count: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Row Count Error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>row-count</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Row Count Error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This error can happen if the data is corrupted.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source does not match the expected row count: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Table dimensions error</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>table-dimensions</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Table dimensions error</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>This error can happen if the data is corrupted.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>The data source does not have the required dimensions: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Deviated Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>deviated-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Deviated Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The value is deviated.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>There is a possible error because the value is deviated: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Deviated cell</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>deviated-cell</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Deviated cell</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The cell is deviated.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>There is a possible error because the cell is deviated: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Required Value</h2>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Type</td>
+<td>required-value</td>
+</tr>
+<tr>
+<td>Title</td>
+<td>Required Value</td>
+</tr>
+<tr>
+<td>Description</td>
+<td>The required values are missing.</td>
+</tr>
+<tr>
+<td>Template</td>
+<td>Required values not found: {note}</td>
+</tr>
+<tr>
+<td>Tags</td>
+<td>#table</td>
+</tr>
+</tbody></table><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-errors.tableerror" class="livemark-reference-heading">errors.TableError <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-errors.tableerror" class="livemark-reference-heading">errors.TableError <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="header.html">
+        Header Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="file.html">
+        « File Errors
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/any.html b/docs/fields/any.html
new file mode 100644
index 0000000000..8afbceee12
--- /dev/null
+++ b/docs/fields/any.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="AnyField provides an ability to skip any cell parsing. Read more in  Table Schema Standard .">
+<meta name="keywords" content="any,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Any Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/any.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Any Field</h1>
+<h2>Overview</h2>
+<p>AnyField provides an ability to skip any cell parsing. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#any">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], [1], ['1']]
+rows = extract(data, schema=Schema(fields=[fields.AnyField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': 1}, {'name': '1'}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.anyfield" class="livemark-reference-heading">fields.AnyField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.anyfield" class="livemark-reference-heading">fields.AnyField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="array.html">
+        Array Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../steps/cell.html">
+        « Cell Steps
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/array.html b/docs/fields/array.html
new file mode 100644
index 0000000000..68bc76abe5
--- /dev/null
+++ b/docs/fields/array.html
@@ -0,0 +1,3565 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains data that is a valid JSON format arrays. Read more in  Table Schema Standard .">
+<meta name="keywords" content="array,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Array Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/array.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Array Field</h1>
+<h2>Overview</h2>
+<p>The field contains data that is a valid JSON format arrays. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#array">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['["value1", "value2"]']]
+rows = extract(data, schema=Schema(fields=[fields.ArrayField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': ['value1', 'value2']}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.arrayfield" class="livemark-reference-heading">fields.ArrayField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.arrayfield" class="livemark-reference-heading">fields.ArrayField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None, array_item: Optional[Dict[str, Any]] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          array_item
+          (Optional[Dict[str, Any]])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-fields.arrayfield.array_item" class="livemark-reference-heading">fields.arrayField.array_item <small>(property)</small></h3>
+      <p>
+    A dictionary that specifies the type and other constraints for the
+    data that will be read in this data type field.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dict[str, Any]]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="boolean.html">
+        Boolean Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="any.html">
+        « Any Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/boolean.html b/docs/fields/boolean.html
new file mode 100644
index 0000000000..121b18210a
--- /dev/null
+++ b/docs/fields/boolean.html
@@ -0,0 +1,3578 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains boolean (true/false) data.">
+<meta name="keywords" content="boolean,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Boolean Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/boolean.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Boolean Field</h1>
+<h2>Overview</h2>
+<p>The field contains boolean (true/false) data.</p>
+<p>In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#boolean">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['true'], ['false']]
+rows = extract(data, schema=Schema(fields=[fields.BooleanField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': True}, {'name': False}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.booleanfield" class="livemark-reference-heading">fields.BooleanField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.booleanfield" class="livemark-reference-heading">fields.BooleanField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None, true_values: List[str] = NOTHING, false_values: List[str] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          true_values
+          (List[str])        </li>
+                <li>
+          false_values
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-fields.booleanfield.true_values" class="livemark-reference-heading">fields.booleanField.true_values <small>(property)</small></h3>
+      <p>
+    It defines the values to be read as true values while reading data. The default
+    true values are ["true", "True", "TRUE", "1"].
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-fields.booleanfield.false_values" class="livemark-reference-heading">fields.booleanField.false_values <small>(property)</small></h3>
+      <p>
+    It defines the values to be read as false values while reading data. The default
+    true values are ["false", "False", "FALSE", "0"].
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="date.html">
+        Date Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="array.html">
+        « Array Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/date.html b/docs/fields/date.html
new file mode 100644
index 0000000000..fde42c6f78
--- /dev/null
+++ b/docs/fields/date.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A date without a time (by default in ISO8601 format). Read more in  Table Schema Standard .">
+<meta name="keywords" content="date,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Date Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/date.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Date Field</h1>
+<h2>Overview</h2>
+<p>A date without a time (by default in ISO8601 format). Read more in <a href="https://specs.frictionlessdata.io/table-schema/#date">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['2022-08-22']]
+rows = extract(data, schema=Schema(fields=[fields.DateField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': datetime.date(2022, 8, 22)}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.datefield" class="livemark-reference-heading">fields.DateField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.datefield" class="livemark-reference-heading">fields.DateField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="datetime.html">
+        Datetime Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="boolean.html">
+        « Boolean Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/datetime.html b/docs/fields/datetime.html
new file mode 100644
index 0000000000..40505d50df
--- /dev/null
+++ b/docs/fields/datetime.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A date with a time (by default in ISO8601 format). Read more in  Table Schema Standard .">
+<meta name="keywords" content="datetime,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Datetime Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/datetime.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Datetime Field</h1>
+<h2>Overview</h2>
+<p>A date with a time (by default in ISO8601 format). Read more in <a href="https://specs.frictionlessdata.io/table-schema/#datetime">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['2022-08-22T12:00:00']]
+rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': datetime.datetime(2022, 8, 22, 12, 0)}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.datetimefield" class="livemark-reference-heading">fields.DatetimeField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.datetimefield" class="livemark-reference-heading">fields.DatetimeField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="duration.html">
+        Duration Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="date.html">
+        « Date Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/duration.html b/docs/fields/duration.html
new file mode 100644
index 0000000000..29112d1c1a
--- /dev/null
+++ b/docs/fields/duration.html
@@ -0,0 +1,3554 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A duration of time. We follow the definition of XML Schema duration datatype directly">
+<meta name="keywords" content="duration,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Duration Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/duration.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Duration Field</h1>
+<h2>Overview</h2>
+<p>A duration of time. We follow the definition of XML Schema duration datatype directly
+and that definition is implicitly inlined here. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#duration">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['P1Y']]
+rows = extract(data, schema=Schema(fields=[fields.DurationField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': isodate.duration.Duration(0, 0, 0, years=1, months=0)}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.durationfield" class="livemark-reference-heading">fields.DurationField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.durationfield" class="livemark-reference-heading">fields.DurationField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="geojson.html">
+        Geojson Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="datetime.html">
+        « Datetime Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/geojson.html b/docs/fields/geojson.html
new file mode 100644
index 0000000000..a4ed0b4966
--- /dev/null
+++ b/docs/fields/geojson.html
@@ -0,0 +1,3552 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in  Table Schema Standard .">
+<meta name="keywords" content="geojson,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Geojson Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/geojson.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Geojson Field</h1>
+<p>The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#geojson">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['{"geometry": null, "type": "Feature", "properties": {"k": "v"}}']]
+rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': {'geometry': None, 'type': 'Feature', 'properties': {'k': 'v'}}}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.geojsonfield" class="livemark-reference-heading">fields.GeojsonField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.geojsonfield" class="livemark-reference-heading">fields.GeojsonField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="geopoint.html">
+        Geopoint Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="duration.html">
+        « Duration Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/geopoint.html b/docs/fields/geopoint.html
new file mode 100644
index 0000000000..f9edd1c1f6
--- /dev/null
+++ b/docs/fields/geopoint.html
@@ -0,0 +1,3552 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains data describing a geographic point. Read more in  Table Schema Standard .">
+<meta name="keywords" content="geopoint,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Geopoint Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/geopoint.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Geopoint Field</h1>
+<p>The field contains data describing a geographic point. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#geopoint">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ["180, -90"]]
+rows = extract(data, schema=Schema(fields=[fields.GeopointField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': [180.0, -90.0]}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.geopointfield" class="livemark-reference-heading">fields.GeopointField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.geopointfield" class="livemark-reference-heading">fields.GeopointField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="integer.html">
+        Integer Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="geojson.html">
+        « Geojson Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/integer.html b/docs/fields/integer.html
new file mode 100644
index 0000000000..e5cb676dcb
--- /dev/null
+++ b/docs/fields/integer.html
@@ -0,0 +1,3565 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in  Table Schema Standard .">
+<meta name="keywords" content="integer,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Integer Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/integer.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Integer Field</h1>
+<p>The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#integer">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['1'], ['2'], ['3']]
+rows = extract(data, schema=Schema(fields=[fields.IntegerField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': 1}, {'name': 2}, {'name': 3}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.integerfield" class="livemark-reference-heading">fields.IntegerField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.integerfield" class="livemark-reference-heading">fields.IntegerField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None, bare_number: bool = True) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          bare_number
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-fields.integerfield.bare_number" class="livemark-reference-heading">fields.integerField.bare_number <small>(property)</small></h3>
+      <p>
+    It specifies that the value is a bare number. If true, the pattern to
+    remove non digit character does not get applied and vice versa.
+    The default value is True.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="number.html">
+        Number Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="geopoint.html">
+        « Geopoint Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/number.html b/docs/fields/number.html
new file mode 100644
index 0000000000..8fac086ce5
--- /dev/null
+++ b/docs/fields/number.html
@@ -0,0 +1,3600 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains numbers of any kind including decimals. Read more in  Table Schema Standard .">
+<meta name="keywords" content="number,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Number Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/number.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Number Field</h1>
+<h2>Overview</h2>
+<p>The field contains numbers of any kind including decimals. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#number">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['1.1'], ['2.2'], ['3.3']]
+rows = extract(data, schema=Schema(fields=[fields.NumberField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': Decimal('1.1')}, {'name': Decimal('2.2')}, {'name': Decimal('3.3')}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.numberfield" class="livemark-reference-heading">fields.NumberField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.numberfield" class="livemark-reference-heading">fields.NumberField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None, bare_number: bool = True, float_number: bool = False, decimal_char: str = ., group_char: str = ) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          bare_number
+          (bool)        </li>
+                <li>
+          float_number
+          (bool)        </li>
+                <li>
+          decimal_char
+          (str)        </li>
+                <li>
+          group_char
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-fields.numberfield.bare_number" class="livemark-reference-heading">fields.numberField.bare_number <small>(property)</small></h3>
+      <p>
+    It specifies that the value is a bare number. If true, the pattern to remove non digit
+    character does not get applied and vice versa. The default value is True.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-fields.numberfield.float_number" class="livemark-reference-heading">fields.numberField.float_number <small>(property)</small></h3>
+      <p>
+    It specifies that the value is a float number.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-fields.numberfield.decimal_char" class="livemark-reference-heading">fields.numberField.decimal_char <small>(property)</small></h3>
+      <p>
+    It specifies the char to be used as decimal character. The default
+    value is ".". It values can be: ".", "@" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-fields.numberfield.group_char" class="livemark-reference-heading">fields.numberField.group_char <small>(property)</small></h3>
+      <p>
+    It specifies the char to be used as group character. The default value
+    is "". It can take values such as: ",", "#" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="object.html">
+        Object Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="integer.html">
+        « Integer Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/object.html b/docs/fields/object.html
new file mode 100644
index 0000000000..01aad880ad
--- /dev/null
+++ b/docs/fields/object.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains data which is valid JSON. Read more in  Table Schema Standard .">
+<meta name="keywords" content="object,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Object Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/object.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Object Field</h1>
+<h2>Overview</h2>
+<p>The field contains data which is valid JSON. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#object">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['{"key": "value"}']]
+rows = extract(data, schema=Schema(fields=[fields.ObjectField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': {'key': 'value'}}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.objectfield" class="livemark-reference-heading">fields.ObjectField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.objectfield" class="livemark-reference-heading">fields.ObjectField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="string.html">
+        String Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="number.html">
+        « Number Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/string.html b/docs/fields/string.html
new file mode 100644
index 0000000000..1e4815d3f7
--- /dev/null
+++ b/docs/fields/string.html
@@ -0,0 +1,3561 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The field contains strings, that is, sequences of characters. Read more in  Table Schema Standard . Currently supported formats:">
+<meta name="keywords" content="string,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>String Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/string.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>String Field</h1>
+<h2>Overview</h2>
+<p>The field contains strings, that is, sequences of characters. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#string">Table Schema Standard</a>. Currently supported formats:</p>
+<ul>
+<li>default</li>
+<li>uri</li>
+<li>email</li>
+<li>uuid</li>
+<li>binary</li>
+<li>wkt (doesn't work in Python3.10+)</li>
+</ul>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['value']]
+rows = extract(data, schema=Schema(fields=[fields.StringField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': 'value'}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.stringfield" class="livemark-reference-heading">fields.StringField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.stringfield" class="livemark-reference-heading">fields.StringField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="time.html">
+        Time Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="object.html">
+        « Object Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/time.html b/docs/fields/time.html
new file mode 100644
index 0000000000..0151e22e5c
--- /dev/null
+++ b/docs/fields/time.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A time without a date. Read more in  Table Schema Standard .">
+<meta name="keywords" content="time,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Time Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/time.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Time Field</h1>
+<h2>Overview</h2>
+<p>A time without a date. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#time">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['15:00:00']]
+rows = extract(data, schema=Schema(fields=[fields.TimeField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': datetime.time(15, 0)}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.timefield" class="livemark-reference-heading">fields.TimeField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.timefield" class="livemark-reference-heading">fields.TimeField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="year.html">
+        Year Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="string.html">
+        « String Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/year.html b/docs/fields/year.html
new file mode 100644
index 0000000000..12798de29a
--- /dev/null
+++ b/docs/fields/year.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in  Table Schema Standard .">
+<meta name="keywords" content="year,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Year Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/year.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Year Field</h1>
+<h2>Overview</h2>
+<p>A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#year">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['2022']]
+rows = extract(data, schema=Schema(fields=[fields.YearField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': 2022}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.yearfield" class="livemark-reference-heading">fields.YearField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.yearfield" class="livemark-reference-heading">fields.YearField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="yearmonth.html">
+        Yearmonth Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="time.html">
+        « Time Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/fields/yearmonth.html b/docs/fields/yearmonth.html
new file mode 100644
index 0000000000..33ff10b151
--- /dev/null
+++ b/docs/fields/yearmonth.html
@@ -0,0 +1,3553 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in  Table Schema Standard .">
+<meta name="keywords" content="yearmonth,field">
+<link rel="icon" href="../../assets/logo.png">
+<title>Yearmonth Field | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/fields/yearmonth.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Yearmonth Field</h1>
+<h2>Overview</h2>
+<p>A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in <a href="https://specs.frictionlessdata.io/table-schema/#yearmonth">Table Schema Standard</a>.</p>
+<h2>Example</h2>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, extract, fields
+
+data = [['name'], ['2022-08']]
+rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name='name')]))
+print(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'memory': [{'name': yearmonth(year=2022, month=8)}]}</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-fields.yearmonthfield" class="livemark-reference-heading">fields.YearmonthField <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-fields.yearmonthfield" class="livemark-reference-heading">fields.YearmonthField <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../errors/metadata.html">
+        Metadata Errors »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="year.html">
+        « Year Field
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/csv.html b/docs/formats/csv.html
new file mode 100644
index 0000000000..ac14ec3d36
--- /dev/null
+++ b/docs/formats/csv.html
@@ -0,0 +1,3680 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="CSV is a file format which you can you in Frictionless for reading and writing. Arguable it's the main Open Data format so it's supported very well in Frictionless.">
+<meta name="keywords" content="csv,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Csv Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/csv.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Csv Format</h1>
+<p>CSV is a file format which you can you in Frictionless for reading and writing. Arguable it's the main Open Data format so it's supported very well in Frictionless.</p>
+<h2>Reading Data</h2>
+<p>You can read this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource('table.csv')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write('table-output.csv')
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a control to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+resource.write('tmp/table.csv', control=formats.CsvControl(delimiter=';'))
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.csvcontrol" class="livemark-reference-heading">formats.CsvControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.csvcontrol" class="livemark-reference-heading">formats.CsvControl <small>(class)</small></h3>
+      <p>Csv dialect representation.
+
+Control class to set params for CSV reader/writer.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, delimiter: str = ,, line_terminator: str = \r\n, quote_char: str = ", double_quote: bool = True, escape_char: Optional[str] = None, null_sequence: Optional[str] = None, skip_initial_space: bool = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          delimiter
+          (str)        </li>
+                <li>
+          line_terminator
+          (str)        </li>
+                <li>
+          quote_char
+          (str)        </li>
+                <li>
+          double_quote
+          (bool)        </li>
+                <li>
+          escape_char
+          (Optional[str])        </li>
+                <li>
+          null_sequence
+          (Optional[str])        </li>
+                <li>
+          skip_initial_space
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.csvcontrol.delimiter" class="livemark-reference-heading">formats.csvControl.delimiter <small>(property)</small></h3>
+      <p>
+    Specify the delimiter used to separate text strings while
+    reading from or writing to the csv file. Default value is ",".
+    For example: delimiter=";"
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.line_terminator" class="livemark-reference-heading">formats.csvControl.line_terminator <small>(property)</small></h3>
+      <p>
+    Specify the line terminator for the csv file while reading/writing.
+    For example: line_terminator="\n". Default line_terminator is "\r\n".
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.quote_char" class="livemark-reference-heading">formats.csvControl.quote_char <small>(property)</small></h3>
+      <p>
+    Specify the quote char for fields that contains a special character
+    such as comma, CR, LF or double quote. Default value is '"'.
+    For example: quotechar='|'
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.double_quote" class="livemark-reference-heading">formats.csvControl.double_quote <small>(property)</small></h3>
+      <p>
+    It controls how 'quote_char' appearing inside a field should themselves be
+    quoted. When set to True, the 'quote_char' is doubled else escape char is
+    used. Default value is True.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.escape_char" class="livemark-reference-heading">formats.csvControl.escape_char <small>(property)</small></h3>
+      <p>
+    A one-character string used by the csv writer to escape. Default is None, which disables
+    escaping. It uses 'quote_char', if double_quote is False.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.null_sequence" class="livemark-reference-heading">formats.csvControl.null_sequence <small>(property)</small></h3>
+      <p>
+    Specify the null sequence and not set by default.
+    For example: \\N
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.csvcontrol.skip_initial_space" class="livemark-reference-heading">formats.csvControl.skip_initial_space <small>(property)</small></h3>
+      <p>
+    Ignores spaces following the comma if set to True.
+    For example space in header(in csv file): "Name", "Team"
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-to_python" class="livemark-reference-heading">formats.csvControl.to_python <small>(method)</small></h3>
+      <p>Conver to Python's `csv.Dialect`</p>
+                </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="erd.html">
+        Erd Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../schemes/stream.html">
+        « Stream Scheme
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/erd.html b/docs/formats/erd.html
new file mode 100644
index 0000000000..9b7cce588e
--- /dev/null
+++ b/docs/formats/erd.html
@@ -0,0 +1,3472 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports exporting a data package as an ER-diagram  dot  file. For example:">
+<meta name="keywords" content="erd,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Erd Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/erd.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Erd Format</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This documentation page is work-in-progress
+  </div>
+</div></div><p>Frictionless supports exporting a data package as an ER-diagram <code>dot</code> file. For example:</p>
+<div><pre><code class="language-python">package = Package('datapackage.zip')
+package.to_er_diagram(path='erd.dot')
+</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="excel.html">
+        Excel Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="csv.html">
+        « Csv Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/excel.html b/docs/formats/excel.html
new file mode 100644
index 0000000000..e426ee7695
--- /dev/null
+++ b/docs/formats/excel.html
@@ -0,0 +1,3660 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Excel is a very popular tabular data format that usually has  xlsx  (newer) and  xls  (older) file extensions. Frictionless supports Excel files extensively.">
+<meta name="keywords" content="excel,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Excel Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/excel.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Excel Format</h1>
+<p>Excel is a very popular tabular data format that usually has <code>xlsx</code> (newer) and <code>xls</code> (older) file extensions. Frictionless supports Excel files extensively.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[excel]
+pip install 'frictionless[excel]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(path='table.xlsx')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write('table-output.xlsx')
+print(target)
+print(target.to_view())
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+resource.write('table-output-sheet.xls', control=formats.ExcelControl(sheet='My Table'))
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.excelcontrol" class="livemark-reference-heading">formats.ExcelControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.excelcontrol" class="livemark-reference-heading">formats.ExcelControl <small>(class)</small></h3>
+      <p>Excel control representation.
+
+Control class to set params for Excel reader/writer.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, sheet: Union[str, int] = 1, workbook_cache: Optional[Any] = None, fill_merged_cells: bool = False, preserve_formatting: bool = False, adjust_floating_point_error: bool = False, stringified: bool = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          sheet
+          (Union[str, int])        </li>
+                <li>
+          workbook_cache
+          (Optional[Any])        </li>
+                <li>
+          fill_merged_cells
+          (bool)        </li>
+                <li>
+          preserve_formatting
+          (bool)        </li>
+                <li>
+          adjust_floating_point_error
+          (bool)        </li>
+                <li>
+          stringified
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.excelcontrol.sheet" class="livemark-reference-heading">formats.excelControl.sheet <small>(property)</small></h3>
+      <p>
+    Name of the sheet from where to read or write data.
+    </p>
+            <h4>Signature</h4>
+      <p>Union[str, int]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.excelcontrol.workbook_cache" class="livemark-reference-heading">formats.excelControl.workbook_cache <small>(property)</small></h3>
+      <p>
+    An empty dictionary which is used to handle workbook caching for remote workbooks.
+    It stores the path to the temporary file while reading remote workbooks.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.excelcontrol.fill_merged_cells" class="livemark-reference-heading">formats.excelControl.fill_merged_cells <small>(property)</small></h3>
+      <p>
+    If True, it will unmerge and fill all merged cells by the visible value.
+    Default value is False.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.excelcontrol.preserve_formatting" class="livemark-reference-heading">formats.excelControl.preserve_formatting <small>(property)</small></h3>
+      <p>
+    If set to True, it preserves text formatting for numeric and temporal cells. If not set,
+    it will return all cell value as string. Default value is False.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.excelcontrol.adjust_floating_point_error" class="livemark-reference-heading">formats.excelControl.adjust_floating_point_error <small>(property)</small></h3>
+      <p>
+    If True, it corrects the Excel behavior regarding floating point numbers.
+    For example: 274.65999999999997 -&gt; 274.66 (When True).
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.excelcontrol.stringified" class="livemark-reference-heading">formats.excelControl.stringified <small>(property)</small></h3>
+      <p>
+    Stringifies all the cell values. Default value
+    is False.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="gsheets.html">
+        Gsheets Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="erd.html">
+        « Erd Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/gsheets.html b/docs/formats/gsheets.html
new file mode 100644
index 0000000000..0f3e3d0637
--- /dev/null
+++ b/docs/formats/gsheets.html
@@ -0,0 +1,3601 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports parsing Google Sheets data as a file format.">
+<meta name="keywords" content="gsheets,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Gsheets Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/gsheets.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Gsheets Format</h1>
+<p>Frictionless supports parsing Google Sheets data as a file format.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[gsheets]
+pip install 'frictionless[gsheets]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read from Google Sheets using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+path='https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing'
+resource = Resource(path=path)
+pprint(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]
+</code></pre>
+</div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+control = formats.GsheetsControl(credentials=".google.json")
+resource = Resource(path='data/table.csv')
+resource.write("https://docs.google.com/spreadsheets/d/&lt;id&gt;/edit", control=control})
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+control = formats.GsheetsControl(credentials=".google.json")
+resource = Resource(path='data/table.csv')
+resource.write("https://docs.google.com/spreadsheets/d/&lt;id&gt;/edit", control=control)
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.gsheetscontrol" class="livemark-reference-heading">formats.GsheetsControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.gsheetscontrol" class="livemark-reference-heading">formats.GsheetsControl <small>(class)</small></h3>
+      <p>Gsheets control representation.
+
+Control class to set params for Gsheets api.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, credentials: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          credentials
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.gsheetscontrol.credentials" class="livemark-reference-heading">formats.gsheetsControl.credentials <small>(property)</small></h3>
+      <p>
+    API key to access google sheets.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="html.html">
+        Html Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="excel.html">
+        « Excel Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/html.html b/docs/formats/html.html
new file mode 100644
index 0000000000..5aae5e810a
--- /dev/null
+++ b/docs/formats/html.html
@@ -0,0 +1,3619 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports parsing HTML format:">
+<meta name="keywords" content="html,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Html Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/html.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Html Format</h1>
+<p>Frictionless supports parsing HTML format:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[html]
+pip install 'frictionless[html]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can this file format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import resources
+
+resource = resources.TableResource(path='table1.html')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, resources
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = resources.TableResource(path='table-output.html')
+source.write(target)
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.html',
+ 'scheme': 'file',
+ 'format': 'html',
+ 'mediatype': 'text/html'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure HTML, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, resources, formats
+
+control=formats.HtmlControl(selector='#id')
+resource = resources.TableResource(path='table1.html', control=control)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.htmlcontrol" class="livemark-reference-heading">formats.HtmlControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.htmlcontrol" class="livemark-reference-heading">formats.HtmlControl <small>(class)</small></h3>
+      <p>Html control representation.
+
+Control class to set params for Html reader/writer.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, selector: str = table) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          selector
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.htmlcontrol.selector" class="livemark-reference-heading">formats.htmlControl.selector <small>(property)</small></h3>
+      <p>
+    Any valid css selector. Default selector is 'table'.
+    For example: "table", "#id", ".meme" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="inline.html">
+        Inline Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="gsheets.html">
+        « Gsheets Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/inline.html b/docs/formats/inline.html
new file mode 100644
index 0000000000..7f24722a6d
--- /dev/null
+++ b/docs/formats/inline.html
@@ -0,0 +1,3619 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports working with Inline Data from memory.">
+<meta name="keywords" content="inline,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Inline Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/inline.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Inline Format</h1>
+<p>Frictionless supports working with Inline Data from memory.</p>
+<h2>Reading Data</h2>
+<p>You can read data in this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': 'german'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource('table.csv')
+target = source.write(format='inline', datatype='table')
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'memory',
+ 'type': 'table',
+ 'data': [['id', 'name'], [1, 'english'], [2, '中国人']],
+ 'format': 'inline'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | '中国人'     |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure this format, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+control = formats.InlineControl(keyed=True, keys=['name', 'id'])
+resource = Resource(data=[{'id': 1, 'name': 'english'}, {'id': 2, 'name': 'german'}], control=control)
+print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">+-----------+----+
+| name      | id |
++===========+====+
+| 'english' |  1 |
++-----------+----+
+| 'german'  |  2 |
++-----------+----+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.inlinecontrol" class="livemark-reference-heading">formats.InlineControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.inlinecontrol" class="livemark-reference-heading">formats.InlineControl <small>(class)</small></h3>
+      <p>Inline control representation.
+
+Control class to set params for Inline reader/writer.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, keys: Optional[List[str]] = None, keyed: bool = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          keys
+          (Optional[List[str]])        </li>
+                <li>
+          keyed
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.inlinecontrol.keys" class="livemark-reference-heading">formats.inlineControl.keys <small>(property)</small></h3>
+      <p>
+    Specify the keys/columns to read from the resource.
+    For example: keys=["id","name"].
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.inlinecontrol.keyed" class="livemark-reference-heading">formats.inlineControl.keyed <small>(property)</small></h3>
+      <p>
+    If set to True, It returns the data as key:value pair.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="json.html">
+        Json Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="html.html">
+        « Html Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/json.html b/docs/formats/json.html
new file mode 100644
index 0000000000..30806d4ee0
--- /dev/null
+++ b/docs/formats/json.html
@@ -0,0 +1,3646 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON).">
+<meta name="keywords" content="json,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Json Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/json.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Json Format</h1>
+<p>Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON).</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[json]
+pip install 'frictionless[json]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<blockquote>
+<p>We use the <code>path</code> argument to ensure that it will not be guessed to be a metadata file</p>
+</blockquote>
+<p>You can read this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import resources
+
+resource = resources.TableResource(path='table.json')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, resources
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = resources.TableResource(path='table-output.json')
+source.write(target)
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.json',
+ 'scheme': 'file',
+ 'format': 'json',
+ 'mediatype': 'text/json'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource, resources, formats
+
+control=formats.JsonControl(keyed=True)
+resource = resources.TableResource(path='table.keyed.json', control=control)
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.jsoncontrol" class="livemark-reference-heading">formats.JsonControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.jsoncontrol" class="livemark-reference-heading">formats.JsonControl <small>(class)</small></h3>
+      <p>Json control representation.
+
+Control class to set params for JSON reader/writer class.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, keys: Optional[List[str]] = None, keyed: bool = False, property: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          keys
+          (Optional[List[str]])        </li>
+                <li>
+          keyed
+          (bool)        </li>
+                <li>
+          property
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.jsoncontrol.keys" class="livemark-reference-heading">formats.jsonControl.keys <small>(property)</small></h3>
+      <p>
+    Specifies the keys/columns to read from the resource.
+    For example: keys=["id","name"].
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.jsoncontrol.keyed" class="livemark-reference-heading">formats.jsonControl.keyed <small>(property)</small></h3>
+      <p>
+    If set to True, It returns the data as key:value pair. Default value is False.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.jsoncontrol.property" class="livemark-reference-heading">formats.jsonControl.property <small>(property)</small></h3>
+      <p>
+    This property specifies the path to the attribute in a json file, it it has
+    nested fields.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="jsonschema.html">
+        JsonSchema Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="inline.html">
+        « Inline Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/jsonschema.html b/docs/formats/jsonschema.html
new file mode 100644
index 0000000000..b52164f932
--- /dev/null
+++ b/docs/formats/jsonschema.html
@@ -0,0 +1,3471 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports importing a JsonSchema profile as a Table Schema. For example:">
+<meta name="keywords" content="jsonschema,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>JsonSchema Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/jsonschema.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>JsonSchema Format</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This documentation page is work-in-progress
+  </div>
+</div></div><p>Frictionless supports importing a JsonSchema profile as a Table Schema. For example:</p>
+<div><pre><code class="language-python">schema = Schema.from_jsonschema('table.jsonschema')
+</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="markdown.html">
+        Markdown Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="json.html">
+        « Json Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/markdown.html b/docs/formats/markdown.html
new file mode 100644
index 0000000000..7a251273cb
--- /dev/null
+++ b/docs/formats/markdown.html
@@ -0,0 +1,3472 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports exporting a metadata object as a Markdown document. For example:">
+<meta name="keywords" content="markdown,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Markdown Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/markdown.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Markdown Format</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This documentation page is work-in-progress
+  </div>
+</div></div><p>Frictionless supports exporting a metadata object as a Markdown document. For example:</p>
+<div><pre><code class="language-python">schema = Schema('schema.json')
+schema.to_markdown('schema.md')
+</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="ods.html">
+        Ods Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="jsonschema.html">
+        « JsonSchema Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/ods.html b/docs/formats/ods.html
new file mode 100644
index 0000000000..32faf7b6e8
--- /dev/null
+++ b/docs/formats/ods.html
@@ -0,0 +1,3600 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports ODS parsing.">
+<meta name="keywords" content="ods,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Ods Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/ods.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Ods Format</h1>
+<p>Frictionless supports ODS parsing.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[ods]
+pip install 'frictionless[ods]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(path='table.ods')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write('table-output.ods')
+pprint(target)
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+resource.write('table-output-sheet.ods', control=formats.OdsControl(sheet='My Table'))
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.odscontrol" class="livemark-reference-heading">formats.OdsControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.odscontrol" class="livemark-reference-heading">formats.OdsControl <small>(class)</small></h3>
+      <p>Ods control representation.
+
+Control class to set params for ODS reader/writer.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, sheet: Union[str, int] = 1) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          sheet
+          (Union[str, int])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.odscontrol.sheet" class="livemark-reference-heading">formats.odsControl.sheet <small>(property)</small></h3>
+      <p>
+    Name or index of the sheet to read/write.
+    </p>
+            <h4>Signature</h4>
+      <p>Union[str, int]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="pandas.html">
+        Pandas Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="markdown.html">
+        « Markdown Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/pandas.html b/docs/formats/pandas.html
new file mode 100644
index 0000000000..3d1612e07a
--- /dev/null
+++ b/docs/formats/pandas.html
@@ -0,0 +1,3515 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports reading and writing Pandas dataframes.">
+<meta name="keywords" content="pandas,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Pandas Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/pandas.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Pandas Format</h1>
+<p>Frictionless supports reading and writing Pandas dataframes.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[pandas]
+pip install 'frictionless[pandas]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read a Pandas dataframe:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(df)
+pprint(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>You can write a dataset to Pandas:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('table.csv')
+df = resource.to_pandas()
+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="parquet.html">
+        Parquet Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="ods.html">
+        « Ods Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/parquet.html b/docs/formats/parquet.html
new file mode 100644
index 0000000000..09c8dfc879
--- /dev/null
+++ b/docs/formats/parquet.html
@@ -0,0 +1,3620 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports reading and writing Parquet files.">
+<meta name="keywords" content="parquet,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Parquet Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/parquet.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Parquet Format</h1>
+<p>Frictionless supports reading and writing Parquet files.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[parquet]
+pip install 'frictionless[parquet]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read a Parquet file:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('table.parq')
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>You can write a dataset to Parquet:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('table.csv')
+target = resource.write('table-output.parq')
+print(target)
+print(target.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.parq',
+ 'scheme': 'file',
+ 'format': 'parq',
+ 'mediatype': 'appliction/parquet'}
+[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.parquetcontrol" class="livemark-reference-heading">formats.ParquetControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.parquetcontrol" class="livemark-reference-heading">formats.ParquetControl <small>(class)</small></h3>
+      <p>Parquet control representation.
+
+Control class to set params for Parquet read/write class.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, columns: Optional[List[str]] = None, categories: Optional[Any] = None, filters: Optional[Any] = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          columns
+          (Optional[List[str]])        </li>
+                <li>
+          categories
+          (Optional[Any])        </li>
+                <li>
+          filters
+          (Optional[Any])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.parquetcontrol.columns" class="livemark-reference-heading">formats.parquetControl.columns <small>(property)</small></h3>
+      <p>
+    A list of columns to load. By selecting columns, we can only access
+    parts of file that we are interested in and skip columns that are
+    not of interest. Default value is None.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.parquetcontrol.categories" class="livemark-reference-heading">formats.parquetControl.categories <small>(property)</small></h3>
+      <p>
+    List of columns that should be returned as Pandas Category-type column.
+    The second example specifies the number of expected labels for that column.
+    For example: categories=['col1'] or categories={'col1': 12}
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.parquetcontrol.filters" class="livemark-reference-heading">formats.parquetControl.filters <small>(property)</small></h3>
+      <p>
+    Specifies the condition to filter data(row-groups).
+    For example: [('col3', 'in', [1, 2, 3, 4])])
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-to_python" class="livemark-reference-heading">formats.parquetControl.to_python <small>(method)</small></h3>
+      <p>Convert to options</p>
+                </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="spss.html">
+        Spss Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="pandas.html">
+        « Pandas Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/spss.html b/docs/formats/spss.html
new file mode 100644
index 0000000000..bda4f74e61
--- /dev/null
+++ b/docs/formats/spss.html
@@ -0,0 +1,3518 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports reading and writing SPSS files.">
+<meta name="keywords" content="spss,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Spss Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/spss.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Spss Format</h1>
+<p>Frictionless supports reading and writing SPSS files.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[spss]
+pip install 'frictionless[spss]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read SPSS files:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource('table.sav')
+pprint(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>You can write SPSS files:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write('table-output.sav')
+pprint(target)
+pprint(target.read_rows())
+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="sql.html">
+        Sql Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="parquet.html">
+        « Parquet Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/sql.html b/docs/formats/sql.html
new file mode 100644
index 0000000000..4a8ad603f7
--- /dev/null
+++ b/docs/formats/sql.html
@@ -0,0 +1,3718 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports reading and writing SQL databases.">
+<meta name="keywords" content="sql,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Sql Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/sql.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Sql Format</h1>
+<p>Frictionless supports reading and writing SQL databases.</p>
+<h2>Supported Databases</h2>
+<p>Frictionless Framework in-general support many databases that can be used with <code>sqlalchemy</code>. Here is a list of the databases with tested support:</p>
+<h3>SQLite</h3>
+<blockquote>
+<p><a href="https://www.sqlite.org/index.html">https://www.sqlite.org/index.html</a></p>
+</blockquote>
+<p>It's a well-tested default database used by Frictionless:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[sql]
+</code></pre>
+
+  </div>
+  </div><h3>PostgreSQL</h3>
+<blockquote>
+<p><a href="https://www.postgresql.org/">https://www.postgresql.org/</a></p>
+</blockquote>
+<p>This database is well-tested and provides the most data types:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[postgresql]
+</code></pre>
+
+  </div>
+  </div><h3>MySQL</h3>
+<blockquote>
+<p><a href="https://www.mysql.com/">https://www.mysql.com/</a></p>
+</blockquote>
+<p>Another popular databases having been tested with Frictionless:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[mysql]
+</code></pre>
+
+  </div>
+  </div><h3>DuckDB</h3>
+<blockquote>
+<p><a href="https://duckdb.org/">https://duckdb.org/</a></p>
+</blockquote>
+<p>DuckDB is a reletively new database and, currently, Frictionless support is experimental:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[duckdb]
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read SQL database:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+control = SqlControl(table="test_table", basepath='data')
+with Resource(path="sqlite:///sqlite.db", control=control) as resource:
+    print(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>You can write SQL databases:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package('path/to/datapackage.json')
+package.publish('postgresql://database')
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, formats
+
+control = SqlControl(table='table', order_by='field', where='field &gt; 20')
+resource = Resource('postgresql://database', control=control)
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.sqlcontrol" class="livemark-reference-heading">formats.SqlControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.sqlcontrol" class="livemark-reference-heading">formats.SqlControl <small>(class)</small></h3>
+      <p>SQL control representation.
+
+Control class to set params for Sql read/write class.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, table: Optional[str] = None, order_by: Optional[str] = None, where: Optional[str] = None, namespace: Optional[str] = None, basepath: Optional[str] = None, with_metadata: bool = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          table
+          (Optional[str])        </li>
+                <li>
+          order_by
+          (Optional[str])        </li>
+                <li>
+          where
+          (Optional[str])        </li>
+                <li>
+          namespace
+          (Optional[str])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          with_metadata
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.sqlcontrol.table" class="livemark-reference-heading">formats.sqlControl.table <small>(property)</small></h3>
+      <p>
+    Table name from which to read the data.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.sqlcontrol.order_by" class="livemark-reference-heading">formats.sqlControl.order_by <small>(property)</small></h3>
+      <p>
+    It specifies the ORDER BY keyword for SQL queries to sort the
+    results that are being read. The default value is None.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.sqlcontrol.where" class="livemark-reference-heading">formats.sqlControl.where <small>(property)</small></h3>
+      <p>
+    It specifies the WHERE clause to filter the records in SQL
+    queries. The default value is None.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.sqlcontrol.namespace" class="livemark-reference-heading">formats.sqlControl.namespace <small>(property)</small></h3>
+      <p>
+    To refer to table using schema or namespace or database such as
+    `FOO`.`TABLEFOO1` we can specify namespace. For example:
+    control = formats.SqlControl(table="test_table", namespace="FOO")
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.sqlcontrol.basepath" class="livemark-reference-heading">formats.sqlControl.basepath <small>(property)</small></h3>
+      <p>
+    It specifies the base path for the database. The basepath will
+    be appended to the db path. The default value is None. For example:
+    formats.SqlControl(table="test_table", basepath="data")
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.sqlcontrol.with_metadata" class="livemark-reference-heading">formats.sqlControl.with_metadata <small>(property)</small></h3>
+      <p>
+    Indicates if a table contains metadata columns like
+    _rowNumber or _rowValid
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="yaml.html">
+        Json Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="spss.html">
+        « Spss Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/yaml.html b/docs/formats/yaml.html
new file mode 100644
index 0000000000..33abb67ee3
--- /dev/null
+++ b/docs/formats/yaml.html
@@ -0,0 +1,3630 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports parsing YAML tables.">
+<meta name="keywords" content="json,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Json Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/yaml.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Json Format</h1>
+<p>Frictionless supports parsing YAML tables.</p>
+<h2>Reading Data</h2>
+<blockquote>
+<p>We use the <code>path</code> argument to ensure that it will not be guessed to be a metadata file</p>
+</blockquote>
+<p>You can read this format using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource, resources
+
+resource = resources.TableResource(path='table.yaml')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>The same is actual for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, resources
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = resources.TableResource(path='table-output.yaml')
+source.write(target)
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.yaml',
+ 'scheme': 'file',
+ 'format': 'yaml',
+ 'mediatype': 'text/yaml'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a dialect to configure how Frictionless read and write files in this format. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource, resources, formats
+
+control=formats.YamlControl(keyed=True)
+resource = resources.TableResource(path='table.keyed.yaml', control=control)
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-formats.yamlcontrol" class="livemark-reference-heading">formats.YamlControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-formats.yamlcontrol" class="livemark-reference-heading">formats.YamlControl <small>(class)</small></h3>
+      <p>Yaml control representation.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, keys: Optional[List[str]] = None, keyed: bool = False, property: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          keys
+          (Optional[List[str]])        </li>
+                <li>
+          keyed
+          (bool)        </li>
+                <li>
+          property
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-formats.yamlcontrol.keys" class="livemark-reference-heading">formats.yamlControl.keys <small>(property)</small></h3>
+      <p>
+    Specifies the keys/columns to read from the resource.
+    For example: keys=["id","name"].
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.yamlcontrol.keyed" class="livemark-reference-heading">formats.yamlControl.keyed <small>(property)</small></h3>
+      <p>
+    If set to True, It returns the data as key:value pair. Default value is False.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-formats.yamlcontrol.property" class="livemark-reference-heading">formats.yamlControl.property <small>(property)</small></h3>
+      <p>
+    This property specifies the path to the attribute in a json file, it it has
+    nested fields.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="zip.html">
+        Zip Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="sql.html">
+        « Sql Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/formats/zip.html b/docs/formats/zip.html
new file mode 100644
index 0000000000..c820c84803
--- /dev/null
+++ b/docs/formats/zip.html
@@ -0,0 +1,3472 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example:">
+<meta name="keywords" content="zip,format">
+<link rel="icon" href="../../assets/logo.png">
+<title>Zip Format | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/formats/zip.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Zip Format</h1>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This documentation page is work-in-progress
+  </div>
+</div></div><p>Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example:</p>
+<div><pre><code class="language-python">package = Package('datapackage.zip')
+package.publish('otherpackage.zip')
+</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../portals/ckan.html">
+        Ckan Portal »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="yaml.html">
+        « Json Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/actions.html b/docs/framework/actions.html
new file mode 100644
index 0000000000..1383c6b5b2
--- /dev/null
+++ b/docs/framework/actions.html
@@ -0,0 +1,3795 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Describe is a high-level function (action) to infer a metadata from a data source.">
+<meta name="keywords" content="data,actions">
+<link rel="icon" href="../../assets/logo.png">
+<title>Data Actions | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/actions.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Data Actions</h1>
+<h2>Describe</h2>
+<p>Describe is a high-level function (action) to infer a metadata from a data source.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('table.csv')
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table',
+ 'type': 'table',
+ 'path': 'table.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'encoding': 'utf-8',
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'name', 'type': 'string'}]}}</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-describe" class="livemark-reference-heading">describe <small>(function)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+                
+    <div>
+      <h4 id="reference-describe" class="livemark-reference-heading">describe <small>(function)</small></h4>
+      <p>Describe the data source</p>
+            <h5>Signature</h5>
+      <p>(source: Optional[Any] = None, *, name: Optional[str] = None, type: Optional[str] = None, stats: bool = False, **options: Any) -&gt; Metadata</p>
+                  <h5>Parameters</h5>
+      <ul>
+                <li>
+          source
+          (Optional[Any]): data source        </li>
+                <li>
+          name
+          (Optional[str]): resoucrce name        </li>
+                <li>
+          type
+          (Optional[str]): data type: "package", "resource", "dialect", or "schema"        </li>
+                <li>
+          stats
+          (bool): if `True` infer resource's stats        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+
+          
+  </div>
+  </div> 
+</div></div><h2>Extract</h2>
+<p>Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in 'utf-8' scheme.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('table.csv')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'table': [{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]}</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-extract" class="livemark-reference-heading">extract <small>(function)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+                
+    <div>
+      <h4 id="reference-extract" class="livemark-reference-heading">extract <small>(function)</small></h4>
+      <p>Extract rows</p>
+            <h5>Signature</h5>
+      <p>(source: Optional[Any] = None, *, name: Optional[str] = None, type: Optional[str] = None, filter: Optional[types.IFilterFunction] = None, process: Optional[types.IProcessFunction] = None, limit_rows: Optional[int] = None, resource_name: Optional[str] = None, **options: Any)</p>
+                  <h5>Parameters</h5>
+      <ul>
+                <li>
+          source
+          (Optional[Any])        </li>
+                <li>
+          name
+          (Optional[str]): extract only resource having this name        </li>
+                <li>
+          type
+          (Optional[str])        </li>
+                <li>
+          filter
+          (Optional[types.IFilterFunction]): row filter function        </li>
+                <li>
+          process
+          (Optional[types.IProcessFunction]): row processor function        </li>
+                <li>
+          limit_rows
+          (Optional[int]): limit amount of rows to this number        </li>
+                <li>
+          resource_name
+          (Optional[str])        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+
+          
+  </div>
+  </div> 
+</div></div><h2>Validate</h2>
+<p>Validate is a high-level function (action) to validate data from a data source.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('table.csv')
+print(report.valid)
+</code></pre>
+
+<pre><code class="language-markup">True</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-validate" class="livemark-reference-heading">validate <small>(function)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+                
+    <div>
+      <h4 id="reference-validate" class="livemark-reference-heading">validate <small>(function)</small></h4>
+      <p>Validate resource</p>
+            <h5>Signature</h5>
+      <p>(source: Optional[Any] = None, *, name: Optional[str] = None, type: Optional[str] = None, checklist: Union[frictionless.checklist.checklist.Checklist, str, NoneType] = None, checks: List[frictionless.checklist.check.Check] = [], pick_errors: List[str] = [], skip_errors: List[str] = [], limit_errors: int = 1000, limit_rows: Optional[int] = None, parallel: bool = False, resource_name: Optional[str] = None, **options: Any)</p>
+                  <h5>Parameters</h5>
+      <ul>
+                <li>
+          source
+          (typing.Optional[typing.Any]): a data source        </li>
+                <li>
+          name
+          (typing.Optional[str])        </li>
+                <li>
+          type
+          (typing.Optional[str]): source type - inquiry, package, resource, schema or table        </li>
+                <li>
+          checklist
+          (typing.Union[frictionless.checklist.checklist.Checklist, str, NoneType])        </li>
+                <li>
+          checks
+          (typing.List[frictionless.checklist.check.Check])        </li>
+                <li>
+          pick_errors
+          (typing.List[str])        </li>
+                <li>
+          skip_errors
+          (typing.List[str])        </li>
+                <li>
+          limit_errors
+          (<class>)        </class></li>
+                <li>
+          limit_rows
+          (typing.Optional[int])        </li>
+                <li>
+          parallel
+          (<class>)        </class></li>
+                <li>
+          resource_name
+          (typing.Optional[str])        </li>
+                <li>
+          options
+          (typing.Any)        </li>
+              </ul>
+          </div>
+
+          
+  </div>
+  </div> 
+</div></div><h2>Transform</h2>
+<p>Transform is a high-level function (action) to transform tabular data from a data source.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import transform, steps
+
+resource = transform('table.csv', steps=[steps.cell_set(field_name='name', value='new')])
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'new'}, {'id': 2, 'name': 'new'}]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-transform" class="livemark-reference-heading">transform <small>(function)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+                
+    <div>
+      <h4 id="reference-transform" class="livemark-reference-heading">transform <small>(function)</small></h4>
+      <p>Transform resource</p>
+            <h5>Signature</h5>
+      <p>(source: Optional[Any] = None, *, type: Optional[str] = None, pipeline: Union[frictionless.pipeline.pipeline.Pipeline, str, NoneType] = None, steps: Optional[List[frictionless.pipeline.step.Step]] = None, **options: Any)</p>
+                  <h5>Parameters</h5>
+      <ul>
+                <li>
+          source
+          (typing.Optional[typing.Any]): data source        </li>
+                <li>
+          type
+          (typing.Optional[str]): data type - package, resource or pipeline (default: infer)        </li>
+                <li>
+          pipeline
+          (typing.Union[frictionless.pipeline.pipeline.Pipeline, str, NoneType])        </li>
+                <li>
+          steps
+          (typing.Optional[typing.List[frictionless.pipeline.step.Step]]): transform steps        </li>
+                <li>
+          options
+          (typing.Any)        </li>
+              </ul>
+          </div>
+
+          
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="catalog.html">
+        Catalog Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../console/validate.html">
+        « Validate
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/catalog.html b/docs/framework/catalog.html
new file mode 100644
index 0000000000..4bd120d184
--- /dev/null
+++ b/docs/framework/catalog.html
@@ -0,0 +1,3888 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Catalog is a set of data packages.">
+<meta name="keywords" content="catalog,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Catalog Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/catalog.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Catalog Class</h1>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This feature is currently experimental. The API might change without notice
+  </div>
+</div></div><p>Catalog is a set of data packages.</p>
+<h2>Creating Catalog</h2>
+<p>We can create a catalog providing a list of data packages:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Catalog, Dataset, Package
+
+catalog = Catalog(datasets=[Dataset(name='name', package=Package('tables/*'))])
+</code></pre>
+
+  </div>
+  </div><h2>Describing Catalog</h2>
+<p>Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Catalog
+
+catalog = Catalog('https://demo.ckan.org/dataset/')
+print(catalog)
+</code></pre>
+
+  </div>
+  </div><h2>Dataset Management</h2>
+<p>The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Catalog
+
+catalog = Catalog('https://demo.ckan.org/dataset/')
+catalog.dataset_names
+catalog.has_dataset
+catalog.add_dataset
+catalog.get_dataset
+catalog.clear_datasets
+</code></pre>
+
+  </div>
+  </div><h2>Saving Descriptor</h2>
+<p>As any of the Metadata classes the Catalog class can be saved as JSON or YAML:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+catalog = Catalog('https://demo.ckan.org/dataset/')
+catalog.to_json('datacatalog.json') # Save as JSON
+catalog.to_yaml('datacatalog.yaml') # Save as YAML
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-catalog" class="livemark-reference-heading">Catalog <small>(class)</small></h3>
+          <h3 id="hidden-reference-dataset" class="livemark-reference-heading">Dataset <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-catalog" class="livemark-reference-heading">Catalog <small>(class)</small></h3>
+      <p>Catalog representation</p>
+            <h4>Signature</h4>
+      <p>(*, source: Optional[Any] = None, control: Optional[Control] = None, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, datasets: List[Dataset] = NOTHING, basepath: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any])        </li>
+                <li>
+          control
+          (Optional[Control])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          datasets
+          (List[Dataset])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-catalog.source" class="livemark-reference-heading">catalog.source <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.control" class="livemark-reference-heading">catalog.control <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Control]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.name" class="livemark-reference-heading">catalog.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “.”, “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.type" class="livemark-reference-heading">catalog.type <small>(property)</small></h3>
+      <p>
+    Type of the object
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.title" class="livemark-reference-heading">catalog.title <small>(property)</small></h3>
+      <p>
+    A Catalog title according to the specs. It should be a
+    human-oriented title of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.description" class="livemark-reference-heading">catalog.description <small>(property)</small></h3>
+      <p>
+    A Catalog description according to the specs. It should be a
+    human-oriented description of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.datasets" class="livemark-reference-heading">catalog.datasets <small>(property)</small></h3>
+      <p>
+    A list of datasets. Each package in the list is a Data Dataset.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dataset]</p>
+          </div>
+        <div>
+      <h3 id="reference-catalog.basepath" class="livemark-reference-heading">catalog.basepath <small>(property)</small></h3>
+      <p>
+    A basepath of the catalog. The normpath of the resource is joined
+    `basepath` and `/path`
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-catalog.dataset_names" class="livemark-reference-heading">catalog.dataset_names <small>(property)</small></h3>
+      <p>Return names of datasets</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-add_dataset" class="livemark-reference-heading">catalog.add_dataset <small>(method)</small></h3>
+      <p>Add new dataset to the catalog</p>
+            <h4>Signature</h4>
+      <p>(dataset: Union[Dataset, str]) -&gt; Dataset</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          dataset
+          (Union[Dataset, str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-clear_datasets" class="livemark-reference-heading">catalog.clear_datasets <small>(method)</small></h3>
+      <p>Remove all the datasets</p>
+                </div>
+        <div>
+      <h3 id="reference-dereference" class="livemark-reference-heading">catalog.dereference <small>(method)</small></h3>
+      <p>Dereference underlaying metadata
+
+If some of underlaying metadata is provided as a string
+it will replace it by the metadata object</p>
+                </div>
+        <div>
+      <h3 id="reference-get_dataset" class="livemark-reference-heading">catalog.get_dataset <small>(method)</small></h3>
+      <p>Get dataset by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Dataset</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_dataset" class="livemark-reference-heading">catalog.has_dataset <small>(method)</small></h3>
+      <p>Check if a dataset is present</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-infer" class="livemark-reference-heading">catalog.infer <small>(method)</small></h3>
+      <p>Infer catalog's metadata</p>
+            <h4>Signature</h4>
+      <p>(*, stats: bool = False)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          stats
+          (bool)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-remove_dataset" class="livemark-reference-heading">catalog.remove_dataset <small>(method)</small></h3>
+      <p>Remove dataset by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Dataset</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_dataset" class="livemark-reference-heading">catalog.set_dataset <small>(method)</small></h3>
+      <p>Set dataset by name</p>
+            <h4>Signature</h4>
+      <p>(dataset: Dataset) -&gt; Optional[Dataset]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          dataset
+          (Dataset)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_copy" class="livemark-reference-heading">catalog.to_copy <small>(method)</small></h3>
+      <p>Create a copy of the catalog</p>
+            <h4>Signature</h4>
+      <p>(**options: Any)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-dataset" class="livemark-reference-heading">Dataset <small>(class)</small></h3>
+      <p>Dataset representation.</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, package: Union[Package, str], basepath: Optional[str] = None, catalog: Optional[Catalog] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          package
+          (Union[Package, str])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          catalog
+          (Optional[Catalog])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-dataset.name" class="livemark-reference-heading">dataset.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “.”, “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset.type" class="livemark-reference-heading">dataset.type <small>(property)</small></h3>
+      <p>
+    A short name(preferably human-readable) for the Check.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with "-" or "_".
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset.title" class="livemark-reference-heading">dataset.title <small>(property)</small></h3>
+      <p>
+    A human-readable title for the Check.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset.description" class="livemark-reference-heading">dataset.description <small>(property)</small></h3>
+      <p>
+    A detailed description for the Check.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset._package" class="livemark-reference-heading">dataset._package <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Union[Package, str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset._basepath" class="livemark-reference-heading">dataset._basepath <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dataset.catalog" class="livemark-reference-heading">dataset.catalog <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Catalog]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-dereference" class="livemark-reference-heading">dataset.dereference <small>(method)</small></h3>
+      <p>Dereference underlaying metadata
+
+If some of underlaying metadata is provided as a string
+it will replace it by the metadata object</p>
+                </div>
+        <div>
+      <h3 id="reference-infer" class="livemark-reference-heading">dataset.infer <small>(method)</small></h3>
+      <p>Infer dataset's metadata</p>
+            <h4>Signature</h4>
+      <p>(*, stats: bool = False)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          stats
+          (bool)        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="package.html">
+        Package Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="actions.html">
+        « Data Actions
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/checklist.html b/docs/framework/checklist.html
new file mode 100644
index 0000000000..7ddc70ed3f
--- /dev/null
+++ b/docs/framework/checklist.html
@@ -0,0 +1,3814 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Checklist is a set of validation checks and a few addition settings. Let's create a checklist:">
+<meta name="keywords" content="checklist,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Checklist Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/checklist.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Checklist Class</h1>
+<h2>Creating Checklist</h2>
+<p>Checklist is a set of validation checks and a few addition settings. Let's create a checklist:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Checklist, checks
+
+checklist = Checklist(checks=[checks.row_constraint(formula='id &gt; 1')])
+print(checklist)
+</code></pre>
+
+<pre><code class="language-markup">{'checks': [{'type': 'row-constraint', 'formula': 'id &gt; 1'}]}</code></pre>
+
+  </div>
+  </div><h2>Validation Checks</h2>
+<p>The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation.</p>
+<div data-title="" python><pre><code class="language-python">from frictionless import Check, errors
+
+class duplicate_row(Check):
+    code = "duplicate-row"
+    Errors = [errors.DuplicateRowError]
+
+    def __init__(self, descriptor=None):
+        super().__init__(descriptor)
+        self.__memory = {}
+
+    def validate_row(self, row):
+        text = ",".join(map(str, row.values()))
+        hash = hashlib.sha256(text.encode("utf-8")).hexdigest()
+        match = self.__memory.get(hash)
+        if match:
+            note = 'the same as row at position "%s"' % match
+            yield errors.DuplicateRowError.from_row(row, note=note)
+        self.__memory[hash] = row.row_position
+
+    # Metadata
+
+    metadata_profile = {  # type: ignore
+        "type": "object",
+        "properties": {},
+    }
+</code></pre>
+</div><p>It's usual to create a custom <a href="../../docs/framework/classes.html">Error</a> along side with a Custom Check.</p>
+<h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-checklist" class="livemark-reference-heading">Checklist <small>(class)</small></h3>
+          <h3 id="hidden-reference-check" class="livemark-reference-heading">Check <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-checklist" class="livemark-reference-heading">Checklist <small>(class)</small></h3>
+      <p>Checklist representation.
+
+A class that combines multiple checks to be applied while validating
+a resource or package.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, checks: List[Check] = NOTHING, pick_errors: List[str] = NOTHING, skip_errors: List[str] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          checks
+          (List[Check])        </li>
+                <li>
+          pick_errors
+          (List[str])        </li>
+                <li>
+          skip_errors
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-checklist.name" class="livemark-reference-heading">checklist.name <small>(property)</small></h3>
+      <p>
+    A short name(preferably human-readable) for the Checklist.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with "-" or "_".
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.type" class="livemark-reference-heading">checklist.type <small>(property)</small></h3>
+      <p>
+    Type of the object
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.title" class="livemark-reference-heading">checklist.title <small>(property)</small></h3>
+      <p>
+    A human-readable title for the Checklist.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.description" class="livemark-reference-heading">checklist.description <small>(property)</small></h3>
+      <p>
+    A detailed description for the Checklist.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.checks" class="livemark-reference-heading">checklist.checks <small>(property)</small></h3>
+      <p>
+    List of checks to be applied during validation such as "deviated-cell",
+    "required-value" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Check]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.pick_errors" class="livemark-reference-heading">checklist.pick_errors <small>(property)</small></h3>
+      <p>
+    Specify the errors names to be picked while validation such as "sha256-count",
+    "byte-count". Errors other than specified will be ignored.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-checklist.skip_errors" class="livemark-reference-heading">checklist.skip_errors <small>(property)</small></h3>
+      <p>
+    Specify the errors names to be skipped while validation such as "sha256-count",
+    "byte-count". Other errors will be included.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-add_check" class="livemark-reference-heading">checklist.add_check <small>(method)</small></h3>
+      <p>Add new check to the schema</p>
+            <h4>Signature</h4>
+      <p>(check: Check) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          check
+          (Check)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-clear_checks" class="livemark-reference-heading">checklist.clear_checks <small>(method)</small></h3>
+      <p>Remove all the checks</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-get_check" class="livemark-reference-heading">checklist.get_check <small>(method)</small></h3>
+      <p>Get check by type</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; Check</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_check" class="livemark-reference-heading">checklist.has_check <small>(method)</small></h3>
+      <p>Check if a check is present</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-remove_check" class="livemark-reference-heading">checklist.remove_check <small>(method)</small></h3>
+      <p>Remove check by type</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; Check</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_check" class="livemark-reference-heading">checklist.set_check <small>(method)</small></h3>
+      <p>Set check by type</p>
+            <h4>Signature</h4>
+      <p>(check: Check) -&gt; Optional[Check]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          check
+          (Check)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-check" class="livemark-reference-heading">Check <small>(class)</small></h3>
+      <p>Check representation.
+
+A base class for all the checks. To add a new custom check, it has to be derived
+from this class.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-check.name" class="livemark-reference-heading">check.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “.”, “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-check.type" class="livemark-reference-heading">check.type <small>(property)</small></h3>
+      <p>
+    A short name(preferably human-readable) for the Check.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with "-" or "_".
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-check.title" class="livemark-reference-heading">check.title <small>(property)</small></h3>
+      <p>
+    A human-readable title for the Check.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-check.description" class="livemark-reference-heading">check.description <small>(property)</small></h3>
+      <p>
+    A detailed description for the Check.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-check.errors" class="livemark-reference-heading">check.Errors <small>(property)</small></h3>
+      <p>
+    List of errors that are being used in the Check.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[List[Type[Error]]]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-check.resource" class="livemark-reference-heading">check.resource <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>Resource</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-connect" class="livemark-reference-heading">check.connect <small>(method)</small></h3>
+      <p>Connect to the given resource</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): data resource        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-validate_end" class="livemark-reference-heading">check.validate_end <small>(method)</small></h3>
+      <p>Called to validate the resource before closing</p>
+            <h4>Signature</h4>
+      <p>() -&gt; Iterable[Error]</p>
+                </div>
+        <div>
+      <h3 id="reference-validate_row" class="livemark-reference-heading">check.validate_row <small>(method)</small></h3>
+      <p>Called to validate the given row (on every row)</p>
+            <h4>Signature</h4>
+      <p>(row: Row) -&gt; Iterable[Error]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          row
+          (Row): table row        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-validate_start" class="livemark-reference-heading">check.validate_start <small>(method)</small></h3>
+      <p>Called to validate the resource after opening</p>
+            <h4>Signature</h4>
+      <p>() -&gt; Iterable[Error]</p>
+                </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="pipeline.html">
+        Pipeline Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="schema.html">
+        « Schema Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/detector.html b/docs/framework/detector.html
new file mode 100644
index 0000000000..b7e8587406
--- /dev/null
+++ b/docs/framework/detector.html
@@ -0,0 +1,4045 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected.">
+<meta name="keywords" content="detector,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Detector Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/detector.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Detector Class</h1>
+<p>The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected.</p>
+<p>Here is a quick example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract table.csv --field-missing-values 1,2
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+           dataset
+┏━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┓
+┃ name  ┃ type  ┃ path      ┃
+┡━━━━━━━╇━━━━━━━╇━━━━━━━━━━━┩
+│ table │ table │ table.csv │
+└───────┴───────┴───────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+      table
+┏━━━━━━┳━━━━━━━━━┓
+┃ id   ┃ name    ┃
+┡━━━━━━╇━━━━━━━━━┩
+│ None │ english │
+│ None │ 中国人  │
+└──────┴─────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-1-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Detector, Resource
+
+detector = Detector(field_missing_values=['1', '2'])
+resource = Resource('table.csv', detector=detector)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': None, 'name': 'english'}, {'id': None, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><p>Many options below have their CLI equivalent. Please consult with the CLI help.</p>
+<h2>Detector Usage</h2>
+<p>The detector class instance are accepted by many classes and functions:</p>
+<ul>
+<li>Package</li>
+<li>Resource</li>
+<li>describe</li>
+<li>extract</li>
+<li>validate</li>
+<li>and more</li>
+</ul>
+<p>You just need to create a Detector instance using desired options and pass to the classed and function from above.</p>
+<h2>Buffer Size</h2>
+<p>By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. </p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(buffer_size=100000)
+resource = describe("country-1.csv", detector=detector)
+print(resource.encoding)
+</code></pre>
+
+<pre><code class="language-markup">utf-8</code></pre>
+
+  </div>
+  </div><h2>Sample Size</h2>
+<p>By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. </p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(sample_size=1000)
+resource = describe("country-1.csv", detector=detector)
+print(resource.schema)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'neighbor_id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}</code></pre>
+
+  </div>
+  </div><h2>Encoding Function</h2>
+<p>By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, Resource
+
+detector = Detector(encoding_function=lambda sample: "utf-8")
+with Resource("table.csv", detector=detector) as resource:
+  print(resource.encoding)
+</code></pre>
+
+<pre><code class="language-markup">utf-8</code></pre>
+
+  </div>
+  </div><h2>Field Type</h2>
+<p>This option allows manually setting all the field types to a given type. It's useful when you need to skip data casting (setting <code>any</code> type) or have everything as a string (setting <code>string</code> type):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_type='string')
+resource = describe("country-1.csv", detector=detector)
+print(resource.schema)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'string'},
+            {'name': 'neighbor_id', 'type': 'string'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'string'}]}</code></pre>
+
+  </div>
+  </div><h2>Field Names</h2>
+<p>Sometimes you don't want to use existent header row to compose field names. It's possible to provide custom names:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_names=["f1", "f2", "f3", "f4"])
+resource = describe("country-1.csv", detector=detector)
+print(resource.schema.field_names)
+</code></pre>
+
+<pre><code class="language-markup">['f1', 'f2', 'f3', 'f4']</code></pre>
+
+  </div>
+  </div><h2>Field Confidence</h2>
+<p>By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_confidence=1)
+resource = describe("country-1.csv", detector=detector)
+print(resource.schema)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'neighbor_id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}</code></pre>
+
+  </div>
+  </div><h2>Field Float Numbers</h2>
+<p>By default, Frictionless will consider that all non integer numbers are decimals. It's possible to make them float which is a faster data type:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_float_numbers=True)
+resource = describe("floats.csv", detector=detector)
+print(resource.schema)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'number', 'type': 'number', 'floatNumber': True}]}
+[{'number': 1.1}, {'number': 1.2}, {'number': 1.3}, {'number': 1.4}, {'number': 1.5}]</code></pre>
+
+  </div>
+  </div><h2>Field Missing Values</h2>
+<p>Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, describe
+
+detector = Detector(field_missing_values=["", "1", "2"])
+resource = describe("table.csv", detector=detector)
+print(resource.schema.missing_values)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">['', '1', '2']
+[{'id': None, 'name': 'english'}, {'id': None, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><p>As we can see, the textual values equal to "67" are now considered nulls. Usually, it's handy when you have data with values like: '-', 'n/a', and similar.</p>
+<h2>Schema Sync</h2>
+<p>There is a way to sync provided schema based on a header row's field order. It's very useful when you have a schema that describes a subset or a superset of the resource's fields:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, Resource, Schema, fields
+
+# Note the order of the fields
+detector = Detector(schema_sync=True)
+schema = Schema(fields=[fields.StringField(name='name'), fields.IntegerField(name='id')])
+with Resource('table.csv', schema=schema, detector=detector) as resource:
+    print(resource.schema)
+    print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'}]}
+[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Schema Patch</h2>
+<p>Sometimes we just want to update only a few fields or some schema's properties without providing a brand new schema. For example, the two examples above can be simplified as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Detector, Resource
+
+detector = Detector(schema_patch={'fields': {'id': {'type': 'string'}}})
+with Resource('table.csv', detector=detector) as resource:
+    print(resource.schema)
+    print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'string'},
+            {'name': 'name', 'type': 'string'}]}
+[{'id': '1', 'name': 'english'}, {'id': '2', 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-detector" class="livemark-reference-heading">Detector <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-detector" class="livemark-reference-heading">Detector <small>(class)</small></h3>
+      <p>Detector representation.
+
+This main purpose of this class is to set the parameters to define
+how different aspects of metadata are detected.</p>
+            <h4>Signature</h4>
+      <p>(*, buffer_size: int = 10000, sample_size: int = 100, encoding_function: Optional[types.IEncodingFunction] = None, encoding_confidence: float = 0.5, field_type: Optional[str] = None, field_names: Optional[List[str]] = None, field_confidence: float = 0.9, field_float_numbers: bool = False, field_missing_values: List[str] = NOTHING, field_true_values: List[str] = NOTHING, field_false_values: List[str] = NOTHING, schema_sync: bool = False, schema_patch: Optional[Dict[str, Any]] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          buffer_size
+          (int)        </li>
+                <li>
+          sample_size
+          (int)        </li>
+                <li>
+          encoding_function
+          (Optional[types.IEncodingFunction])        </li>
+                <li>
+          encoding_confidence
+          (float)        </li>
+                <li>
+          field_type
+          (Optional[str])        </li>
+                <li>
+          field_names
+          (Optional[List[str]])        </li>
+                <li>
+          field_confidence
+          (float)        </li>
+                <li>
+          field_float_numbers
+          (bool)        </li>
+                <li>
+          field_missing_values
+          (List[str])        </li>
+                <li>
+          field_true_values
+          (List[str])        </li>
+                <li>
+          field_false_values
+          (List[str])        </li>
+                <li>
+          schema_sync
+          (bool)        </li>
+                <li>
+          schema_patch
+          (Optional[Dict[str, Any]])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-detector.buffer_size" class="livemark-reference-heading">detector.buffer_size <small>(property)</small></h3>
+      <p>
+    The amount of bytes to be extracted as a buffer. It defaults to 10000.
+    The buffer_size can be increased to improve the inference accuracy to
+    detect file encoding.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.sample_size" class="livemark-reference-heading">detector.sample_size <small>(property)</small></h3>
+      <p>
+    The amount of rows to be extracted as a sample for dialect/schema infering.
+    It defaults to 100. The sample_size can be increased to improve the inference
+    accuracy.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.encoding_function" class="livemark-reference-heading">detector.encoding_function <small>(property)</small></h3>
+      <p>
+    A custom encoding function for the file.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[types.IEncodingFunction]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.encoding_confidence" class="livemark-reference-heading">detector.encoding_confidence <small>(property)</small></h3>
+      <p>
+    Confidence value for encoding function.
+    </p>
+            <h4>Signature</h4>
+      <p>float</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_type" class="livemark-reference-heading">detector.field_type <small>(property)</small></h3>
+      <p>
+    Enforce all the inferred types to be this type.
+    For more information, please check "Describing  Data" guide.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_names" class="livemark-reference-heading">detector.field_names <small>(property)</small></h3>
+      <p>
+    Enforce all the inferred fields to have provided names.
+    For more information, please check "Describing  Data" guide.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_confidence" class="livemark-reference-heading">detector.field_confidence <small>(property)</small></h3>
+      <p>
+    A number from 0 to 1 setting the infer confidence.
+    If  1 the data is guaranteed to be valid against the inferred schema.
+    For more information, please check "Describing  Data" guide.
+    It defaults to 0.9
+    </p>
+            <h4>Signature</h4>
+      <p>float</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_float_numbers" class="livemark-reference-heading">detector.field_float_numbers <small>(property)</small></h3>
+      <p>
+    Flag to indicate desired number type.
+    By default numbers will be `Decimal`; if `True` - `float`.
+    For more information, please check "Describing  Data" guide.
+    It defaults to `False`
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_missing_values" class="livemark-reference-heading">detector.field_missing_values <small>(property)</small></h3>
+      <p>
+    String to be considered as missing values.
+    For more information, please check "Describing  Data" guide.
+    It defaults to `['']`
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_true_values" class="livemark-reference-heading">detector.field_true_values <small>(property)</small></h3>
+      <p>
+    String to be considered as true values.
+    For more information, please check "Describing  Data" guide.
+    It defaults to `["true", "True", "TRUE", "1"]`
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.field_false_values" class="livemark-reference-heading">detector.field_false_values <small>(property)</small></h3>
+      <p>
+    String to be considered as false values.
+    For more information, please check "Describing  Data" guide.
+    It defaults to `["false", "False", "FALSE", "0"]`
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.schema_sync" class="livemark-reference-heading">detector.schema_sync <small>(property)</small></h3>
+      <p>
+    Whether to sync the schema.
+    If it sets to `True` the provided schema will be mapped to
+    the inferred schema. It means that, for example, you can
+    provide a subset of fileds to be applied on top of the inferred
+    fields or the provided schema can have different order of fields.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-detector.schema_patch" class="livemark-reference-heading">detector.schema_patch <small>(property)</small></h3>
+      <p>
+    A dictionary to be used as an inferred schema patch.
+    The form of this dictionary should follow the Schema descriptor form
+    except for the `fields` property which should be a mapping with the
+    key named after a field name and the values being a field patch.
+    For more information, please check "Extracting Data" guide.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dict[str, Any]]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-detect_dialect" class="livemark-reference-heading">detector.detect_dialect <small>(method)</small></h3>
+      <p>Detect dialect from sample</p>
+            <h4>Signature</h4>
+      <p>(sample: types.ISample, *, dialect: Optional[Dialect] = None) -&gt; Dialect</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          sample
+          (types.ISample): data sample        </li>
+                <li>
+          dialect
+          (Optional[Dialect])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_encoding" class="livemark-reference-heading">detector.detect_encoding <small>(method)</small></h3>
+      <p>Detect encoding from buffer</p>
+            <h4>Signature</h4>
+      <p>(buffer: types.IBuffer, *, encoding: Optional[str] = None) -&gt; str</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          buffer
+          (types.IBuffer): byte buffer        </li>
+                <li>
+          encoding
+          (Optional[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_metadata_type" class="livemark-reference-heading">Detector.detect_metadata_type <small>(method) (static)</small></h3>
+      <p>Return an descriptor type as 'resource' or 'package'</p>
+            <h4>Signature</h4>
+      <p>(source: Any, *, format: Optional[str] = None) -&gt; Optional[str]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Any)        </li>
+                <li>
+          format
+          (Optional[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_resource" class="livemark-reference-heading">detector.detect_resource <small>(method)</small></h3>
+      <p>Detects path details</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-detect_schema" class="livemark-reference-heading">detector.detect_schema <small>(method)</small></h3>
+      <p>Detect schema from fragment</p>
+            <h4>Signature</h4>
+      <p>(fragment: types.IFragment, *, labels: Optional[List[str]] = None, schema: Optional[Schema] = None, field_candidates: List[Dict[str, Any]] = [{type: yearmonth}, {type: geopoint}, {type: duration}, {type: geojson}, {type: object}, {type: array}, {type: datetime}, {type: time}, {type: date}, {type: integer}, {type: number}, {type: boolean}, {type: year}, {type: string}]) -&gt; Schema</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          fragment
+          (types.IFragment): data fragment        </li>
+                <li>
+          labels
+          (Optional[List[str]])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          field_candidates
+          (List[Dict[str, Any]])        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="inquiry.html">
+        Inquiry Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="pipeline.html">
+        « Pipeline Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/dialect.html b/docs/framework/dialect.html
new file mode 100644
index 0000000000..33aab55f85
--- /dev/null
+++ b/docs/framework/dialect.html
@@ -0,0 +1,3990 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats.">
+<meta name="keywords" content="dialect,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Dialect Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/dialect.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Dialect Class</h1>
+<p>The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats.</p>
+<h2>Dialect</h2>
+<p>The Dialect class instance are accepted by many classes and functions:</p>
+<ul>
+<li>Resource</li>
+<li>describe</li>
+<li>extract</li>
+<li>validate</li>
+<li>and more</li>
+</ul>
+<p>You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat capital-3.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+  </div><h2>Header</h2>
+<p>It's a boolean flag which defaults to <code>True</code> indicating whether the data has a header row or not. In the following example the header row will be treated as a data row:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(header=False)
+with Resource('capital-3.csv', dialect=dialect) as resource:
+      print(resource.header.labels)
+      print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">[]
++--------+----------+
+| field1 | field2   |
++========+==========+
+| 'id'   | 'name'   |
++--------+----------+
+| '1'    | 'London' |
++--------+----------+
+| '2'    | 'Berlin' |
++--------+----------+
+| '3'    | 'Paris'  |
++--------+----------+
+| '4'    | 'Madrid' |
++--------+----------+
+...</code></pre>
+
+  </div>
+  </div><h2>Header Rows</h2>
+<p>If header is <code>True</code> which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let's see on example how the first two data rows can be treated as a part of a header:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(header_rows=[1, 2, 3])
+with Resource('capital-3.csv', dialect=dialect) as resource:
+    print(resource.header)
+    print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">['id 1 2', 'name London Berlin']
++--------+--------------------+
+| id 1 2 | name London Berlin |
++========+====================+
+|      3 | 'Paris'            |
++--------+--------------------+
+|      4 | 'Madrid'           |
++--------+--------------------+
+|      5 | 'Rome'             |
++--------+--------------------+</code></pre>
+
+  </div>
+  </div><h2>Header Join</h2>
+<p>If there are multiple header rows which is managed by <code>header_rows</code> parameter, we can set a string to be a separator for a header's cell join operation. Usually it's very handy for some "fancy" Excel files. For the sake of simplicity, we will show on a CSV file:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(header_rows=[1, 2, 3], header_join='/')
+with Resource('capital-3.csv', dialect=dialect) as resource:
+    print(resource.header)
+    print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">['id/1/2', 'name/London/Berlin']
++--------+--------------------+
+| id/1/2 | name/London/Berlin |
++========+====================+
+|      3 | 'Paris'            |
++--------+--------------------+
+|      4 | 'Madrid'           |
++--------+--------------------+
+|      5 | 'Rome'             |
++--------+--------------------+</code></pre>
+
+  </div>
+  </div><h2>Header Case</h2>
+<p>By default a header is validated in a case sensitive mode. To disable this behaviour we can set the <code>header_case</code> parameter to <code>False</code>. This option is accepted by any Dialect and a dialect can be passed to <code>extract</code>, <code>validate</code> and other functions. Please note that it doesn't affect a resulting header it only affects how it's validated:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Schema, Dialect, fields
+
+dialect = Dialect(header_case=False)
+schema = Schema(fields=[fields.StringField(name="ID"), fields.StringField(name="NAME")])
+with Resource('capital-3.csv', dialect=dialect, schema=schema) as resource:
+  print(f'Header: {resource.header}')
+  print(f'Valid: {resource.header.valid}')  # without "header_case" it will have 2 errors
+</code></pre>
+
+<pre><code class="language-markup">Header: ['ID', 'NAME']
+Valid: True</code></pre>
+
+  </div>
+  </div><h2>Comment Char</h2>
+<p>Specifies char used to comment the rows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(comment_char="#")
+with Resource(b'name\n#row1\nrow2', format="csv", dialect=dialect) as resource:
+    print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'name': 'row2'}]</code></pre>
+
+  </div>
+  </div><h2>Comment Rows</h2>
+<p>A list of rows to ignore:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(comment_rows=[2])
+with Resource(b'name\nrow1\nrow2', format="csv", dialect=dialect) as resource:
+    print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'name': 'row2'}]</code></pre>
+
+  </div>
+  </div><h2>Skip Blank Rows</h2>
+<p>Ignores rows if they are completely blank.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Dialect
+
+dialect = Dialect(skip_blank_rows=True)
+with Resource(b'name\n\nrow2', format="csv", dialect=dialect) as resource:
+    print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'name': 'row2'}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-dialect" class="livemark-reference-heading">Dialect <small>(class)</small></h3>
+          <h3 id="hidden-reference-control" class="livemark-reference-heading">Control <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-9-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-dialect" class="livemark-reference-heading">Dialect <small>(class)</small></h3>
+      <p>Dialect representation</p>
+            <h4>Signature</h4>
+      <p>(*, descriptor: Optional[Union[types.IDescriptor, str]] = None, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, header: bool = True, header_rows: List[int] = NOTHING, header_join: str =  , header_case: bool = True, comment_char: Optional[str] = None, comment_rows: List[int] = NOTHING, skip_blank_rows: bool = False, controls: List[Control] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          descriptor
+          (Optional[Union[types.IDescriptor, str]])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          header
+          (bool)        </li>
+                <li>
+          header_rows
+          (List[int])        </li>
+                <li>
+          header_join
+          (str)        </li>
+                <li>
+          header_case
+          (bool)        </li>
+                <li>
+          comment_char
+          (Optional[str])        </li>
+                <li>
+          comment_rows
+          (List[int])        </li>
+                <li>
+          skip_blank_rows
+          (bool)        </li>
+                <li>
+          controls
+          (List[Control])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-dialect.descriptor" class="livemark-reference-heading">dialect.descriptor <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Union[types.IDescriptor, str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.name" class="livemark-reference-heading">dialect.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.type" class="livemark-reference-heading">dialect.type <small>(property)</small></h3>
+      <p>
+    Type of the object
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.title" class="livemark-reference-heading">dialect.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Dialect.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.description" class="livemark-reference-heading">dialect.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Dialect.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.header" class="livemark-reference-heading">dialect.header <small>(property)</small></h3>
+      <p>
+    If true, the header will be read else header will be skipped.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.header_rows" class="livemark-reference-heading">dialect.header_rows <small>(property)</small></h3>
+      <p>
+    Specifies the row numbers for the header. Default is [1].
+    </p>
+            <h4>Signature</h4>
+      <p>List[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.header_join" class="livemark-reference-heading">dialect.header_join <small>(property)</small></h3>
+      <p>
+    Separator to join text of two column's. The default value is " " and other values
+    could be ":", "-" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.header_case" class="livemark-reference-heading">dialect.header_case <small>(property)</small></h3>
+      <p>
+    If set to false, it does case insensitive matching of header. The default value
+    is True.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.comment_char" class="livemark-reference-heading">dialect.comment_char <small>(property)</small></h3>
+      <p>
+    Specifies char used to comment the rows. The default value is None.
+    For example: "#".
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.comment_rows" class="livemark-reference-heading">dialect.comment_rows <small>(property)</small></h3>
+      <p>
+    A list of rows to ignore. For example: [1, 2]
+    </p>
+            <h4>Signature</h4>
+      <p>List[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.skip_blank_rows" class="livemark-reference-heading">dialect.skip_blank_rows <small>(property)</small></h3>
+      <p>
+    Ignores rows if they are completely blank
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-dialect.controls" class="livemark-reference-heading">dialect.controls <small>(property)</small></h3>
+      <p>
+    A list of controls which defines different aspects of reading data.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Control]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-add_control" class="livemark-reference-heading">dialect.add_control <small>(method)</small></h3>
+      <p>Add new control to the schema</p>
+            <h4>Signature</h4>
+      <p>(control: Control) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          control
+          (Control)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-describe" class="livemark-reference-heading">Dialect.describe <small>(method) (static)</small></h3>
+      <p>Describe the given source as a dialect</p>
+            <h4>Signature</h4>
+      <p>(source: Optional[Any] = None, **options: Any) -&gt; Dialect</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any]): data source        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-get_control" class="livemark-reference-heading">dialect.get_control <small>(method)</small></h3>
+      <p>Get control by type</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; Control</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_control" class="livemark-reference-heading">dialect.has_control <small>(method)</small></h3>
+      <p>Check if control is present</p>
+            <h4>Signature</h4>
+      <p>(type: str)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_control" class="livemark-reference-heading">dialect.set_control <small>(method)</small></h3>
+      <p>Set control by type</p>
+            <h4>Signature</h4>
+      <p>(control: Control) -&gt; Optional[Control]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          control
+          (Control)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-control" class="livemark-reference-heading">Control <small>(class)</small></h3>
+      <p>Control representation.
+
+This class is the base class for all the control classes that are
+used to set the states of various different components.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-control.name" class="livemark-reference-heading">control.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-control.type" class="livemark-reference-heading">control.type <small>(property)</small></h3>
+      <p>
+    Type of the control. It could be a zenodo plugin control, csv control etc.
+    For example: "csv", "zenodo" etc
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-control.title" class="livemark-reference-heading">control.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the control.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-control.description" class="livemark-reference-heading">control.description <small>(property)</small></h3>
+      <p>
+    A brief description of the control.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="schema.html">
+        Schema Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="resource.html">
+        « Resource Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/error.html b/docs/framework/error.html
new file mode 100644
index 0000000000..60d7f96eae
--- /dev/null
+++ b/docs/framework/error.html
@@ -0,0 +1,3583 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Error class is a metadata with no behavior. It's used to describe an error that happened during Framework work or during the validation.">
+<meta name="keywords" content="error,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Error Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/error.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Error Class</h1>
+<p>The Error class is a metadata with no behavior. It's used to describe an error that happened during Framework work or during the validation.</p>
+<p>To create a custom error you basically just need to fill the required class fields:</p>
+<div data-title="" python><pre><code class="language-python">from frictionless import errors
+
+class DuplicateRowError(errors.RowError):
+    code = "duplicate-row"
+    name = "Duplicate Row"
+    tags = ["#table", "#row", "#duplicate"]
+    template = "Row at position {rowPosition} is duplicated: {note}"
+    description = "The row is duplicated."
+</code></pre>
+</div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-error" class="livemark-reference-heading">Error <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-1-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-error" class="livemark-reference-heading">Error <small>(class)</small></h3>
+      <p>Error representation.
+
+It is a baseclass from which other subclasses of errors are inherited or
+derived from.</p>
+            <h4>Signature</h4>
+      <p>(*, note: str) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          note
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-error.type" class="livemark-reference-heading">error.type <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-error.title" class="livemark-reference-heading">error.title <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-error.description" class="livemark-reference-heading">error.description <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-error.template" class="livemark-reference-heading">error.template <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-error.tags" class="livemark-reference-heading">error.tags <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-error.message" class="livemark-reference-heading">error.message <small>(property)</small></h3>
+      <p>
+    A human readable informative comprehensive description of the error. It can be set to any custom text.
+    If not set, default description is more comprehensive with error type, message and reasons included.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-error.note" class="livemark-reference-heading">error.note <small>(property)</small></h3>
+      <p>
+    A short human readable description of the error. It can be set to any custom text.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../advanced/design.html">
+        Design »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="table.html">
+        « Table Classes
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/inquiry.html b/docs/framework/inquiry.html
new file mode 100644
index 0000000000..05943e4f3a
--- /dev/null
+++ b/docs/framework/inquiry.html
@@ -0,0 +1,3850 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks.">
+<meta name="keywords" content="inquiry,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Inquiry Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/inquiry.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Inquiry Class</h1>
+<p>The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks.</p>
+<h2>Creating Inquiry</h2>
+<p>Let's create an inquiry that includes an individual file validation and a resource validation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Inquiry
+
+inquiry = Inquiry.from_descriptor({'tasks': [
+  {'path': 'capital-valid.csv'},
+  {'path': 'capital-invalid.csv'},
+]})
+inquiry.to_yaml('capital.inquiry-example.yaml')
+print(inquiry)
+</code></pre>
+
+<pre><code class="language-markup">{'tasks': [{'path': 'capital-valid.csv'}, {'path': 'capital-invalid.csv'}]}</code></pre>
+
+  </div>
+  </div><h2>Validating Inquiry</h2>
+<p>Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding <code>validate</code> functions have. As usual, let' run validation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate capital.inquiry-example.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                          dataset
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name            ┃ type  ┃ path                ┃ status  ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ capital-valid   │ table │ capital-valid.csv   │ VALID   │
+│ capital-invalid │ table │ capital-invalid.csv │ INVALID │
+└─────────────────┴───────┴─────────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                capital-invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ duplicate-label │ Label "name" in the header at position "3"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 10   │ 3     │ missing-cell    │ Row at position "10" has a missing cell in  │
+│      │       │                 │ field "name2" at position "3"               │
+│ 11   │ None  │ blank-row       │ Row at position "11" is completely blank    │
+│ 12   │ 1     │ type-error      │ Type error in the cell "x" in row "12" and  │
+│      │       │                 │ field "id" at position "1": type is         │
+│      │       │                 │ "integer/default"                           │
+│ 12   │ 4     │ extra-cell      │ Row at position "12" has an extra value in  │
+│      │       │                 │ field at position "4"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+  </div><p>At first sight, it's no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided.</p>
+<h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-inquiry" class="livemark-reference-heading">Inquiry <small>(class)</small></h3>
+          <h3 id="hidden-reference-inquirytask" class="livemark-reference-heading">InquiryTask <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-3-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-inquiry" class="livemark-reference-heading">Inquiry <small>(class)</small></h3>
+      <p>Inquiry representation.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, tasks: List[InquiryTask] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          tasks
+          (List[InquiryTask])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-inquiry.name" class="livemark-reference-heading">inquiry.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquiry.type" class="livemark-reference-heading">inquiry.type <small>(property)</small></h3>
+      <p>
+    Type of the object
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquiry.title" class="livemark-reference-heading">inquiry.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Inquiry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquiry.description" class="livemark-reference-heading">inquiry.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Inquiry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquiry.tasks" class="livemark-reference-heading">inquiry.tasks <small>(property)</small></h3>
+      <p>
+    List of underlaying task to be validated.
+    </p>
+            <h4>Signature</h4>
+      <p>List[InquiryTask]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-validate" class="livemark-reference-heading">inquiry.validate <small>(method)</small></h3>
+      <p>Validate inquiry</p>
+            <h4>Signature</h4>
+      <p>(*, parallel: bool = False)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          parallel
+          (bool)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-inquirytask" class="livemark-reference-heading">InquiryTask <small>(class)</small></h3>
+      <p>Inquiry task representation.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, type: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, path: Optional[str] = None, scheme: Optional[str] = None, format: Optional[str] = None, encoding: Optional[str] = None, mediatype: Optional[str] = None, compression: Optional[str] = None, extrapaths: Optional[List[str]] = None, innerpath: Optional[str] = None, dialect: Optional[Dialect] = None, schema: Optional[Schema] = None, checklist: Optional[Checklist] = None, resource: Optional[str] = None, package: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          type
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          path
+          (Optional[str])        </li>
+                <li>
+          scheme
+          (Optional[str])        </li>
+                <li>
+          format
+          (Optional[str])        </li>
+                <li>
+          encoding
+          (Optional[str])        </li>
+                <li>
+          mediatype
+          (Optional[str])        </li>
+                <li>
+          compression
+          (Optional[str])        </li>
+                <li>
+          extrapaths
+          (Optional[List[str]])        </li>
+                <li>
+          innerpath
+          (Optional[str])        </li>
+                <li>
+          dialect
+          (Optional[Dialect])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+                <li>
+          checklist
+          (Optional[Checklist])        </li>
+                <li>
+          resource
+          (Optional[str])        </li>
+                <li>
+          package
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-inquirytask.name" class="livemark-reference-heading">inquiryTask.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.type" class="livemark-reference-heading">inquiryTask.type <small>(property)</small></h3>
+      <p>
+    Type of the source to be validated such as "package", "resource" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.title" class="livemark-reference-heading">inquiryTask.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Inquiry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.description" class="livemark-reference-heading">inquiryTask.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Inquiry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.path" class="livemark-reference-heading">inquiryTask.path <small>(property)</small></h3>
+      <p>
+    Path to the data source.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.scheme" class="livemark-reference-heading">inquiryTask.scheme <small>(property)</small></h3>
+      <p>
+    Scheme for loading the file (file, http, ...). If not set, it'll be
+    inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.format" class="livemark-reference-heading">inquiryTask.format <small>(property)</small></h3>
+      <p>
+    File source's format (csv, xls, ...). If not set, it'll be
+    inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.encoding" class="livemark-reference-heading">inquiryTask.encoding <small>(property)</small></h3>
+      <p>
+    Source encoding. If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.mediatype" class="livemark-reference-heading">inquiryTask.mediatype <small>(property)</small></h3>
+      <p>
+    Mediatype/mimetype of the resource e.g. “text/csv”, or “application/vnd.ms-excel”.
+    Mediatypes are maintained by the Internet Assigned Numbers Authority (IANA) in a
+    media type registry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.compression" class="livemark-reference-heading">inquiryTask.compression <small>(property)</small></h3>
+      <p>
+    Source file compression (zip, ...). If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.extrapaths" class="livemark-reference-heading">inquiryTask.extrapaths <small>(property)</small></h3>
+      <p>
+    List of paths to concatenate to the main path. It's used for multipart resources.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.innerpath" class="livemark-reference-heading">inquiryTask.innerpath <small>(property)</small></h3>
+      <p>
+    Path within the compressed file. It defaults to the first file in the archive
+    (if the source is an archive).
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.dialect" class="livemark-reference-heading">inquiryTask.dialect <small>(property)</small></h3>
+      <p>
+    Specific set of formatting parameters applied while reading data source.
+    The parameters are set as a Dialect class. For more information, please
+    check the Dialect Class documentation.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dialect]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.schema" class="livemark-reference-heading">inquiryTask.schema <small>(property)</small></h3>
+      <p>
+    Schema descriptor. A string descriptor or path to schema file.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Schema]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.checklist" class="livemark-reference-heading">inquiryTask.checklist <small>(property)</small></h3>
+      <p>
+    Checklist class with a set of validation checks to be applied to the
+    data source being read. For more information, please check the
+    Validation Checks documentation.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Checklist]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.resource" class="livemark-reference-heading">inquiryTask.resource <small>(property)</small></h3>
+      <p>
+    Resource descriptor. A string descriptor or path to resource file.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-inquirytask.package" class="livemark-reference-heading">inquiryTask.package <small>(property)</small></h3>
+      <p>
+    Package descriptor. A string descriptor or path to package
+    file.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="report.html">
+        Report Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="detector.html">
+        « Detector Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/package.html b/docs/framework/package.html
new file mode 100644
index 0000000000..6c2f7da43c
--- /dev/null
+++ b/docs/framework/package.html
@@ -0,0 +1,4234 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read  Data Package Standard  for more information.">
+<meta name="keywords" content="package,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Package Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/package.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Package Class</h1>
+<p>The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read <a href="https://specs.frictionlessdata.io/data-package/">Data Package Standard</a> for more information.</p>
+<h2>Creating Package</h2>
+<p>Let's create a data package:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource
+
+package = Package('table.csv') # from a resource path
+package = Package('tables/*') # from a resources glob
+package = Package(['tables/chunk1.csv', 'tables/chunk2.csv']) # from a list
+package = Package('package/datapackage.json') # from a descriptor path
+package = Package({'resources': {'path': 'table.csv'}}) # from a descriptor
+package = Package(resources=[Resource(path='table.csv')]) # from arguments
+</code></pre>
+
+  </div>
+  </div><p>As you can see it's possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it's a glob or a path). It's possible to make this step more explicit:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource
+
+package = Package(resources=[Resource(path='table.csv')]) # from arguments
+package = Package('datapackage.json') # from a descriptor
+</code></pre>
+
+  </div>
+  </div><h2>Describing Package</h2>
+<p>The standards support a great deal of package metadata which is possible to have with Frictionless Framework too:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource
+
+package = Package(
+    name='package',
+    title='My Package',
+    description='My Package for the Guide',
+    resources=[Resource(path='table.csv')],
+    # it's possible to provide all the official properties like homepage, version, etc
+)
+print(package)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'package',
+ 'title': 'My Package',
+ 'description': 'My Package for the Guide',
+ 'resources': [{'name': 'table',
+                'type': 'table',
+                'path': 'table.csv',
+                'scheme': 'file',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}</code></pre>
+
+  </div>
+  </div><p>If you have created a package, for example, from a descriptor you can access this properties:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package('datapackage.json')
+print(package.name)
+# and others
+</code></pre>
+
+<pre><code class="language-markup">test-tabulator</code></pre>
+
+  </div>
+  </div><p>And edit them:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package('datapackage.json')
+package.name = 'new-name'
+package.title = 'New Title'
+package.description = 'New Description'
+# and others
+print(package)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'new-name',
+ 'title': 'New Title',
+ 'description': 'New Description',
+ 'resources': [{'name': 'first-resource',
+                'type': 'table',
+                'path': 'table.xls',
+                'scheme': 'file',
+                'format': 'xls',
+                'mediatype': 'application/vnd.ms-excel',
+                'schema': {'fields': [{'name': 'id', 'type': 'number'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'number-two',
+                'type': 'table',
+                'path': 'table-reverse.csv',
+                'scheme': 'file',
+                'format': 'csv',
+                'mediatype': 'text/csv',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}</code></pre>
+
+  </div>
+  </div><h2>Resource Management</h2>
+<p>The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource
+
+package = Package('datapackage.json')
+print(package.resources)
+print(package.resource_names)
+package.add_resource(Resource(name='new', data=[['key1', 'key2'], ['val1', 'val2']]))
+resource = package.get_resource('new')
+print(package.has_resource('new'))
+package.remove_resource('new')
+</code></pre>
+
+<pre><code class="language-markup">[{'name': 'first-resource',
+ 'type': 'table',
+ 'path': 'table.xls',
+ 'scheme': 'file',
+ 'format': 'xls',
+ 'mediatype': 'application/vnd.ms-excel',
+ 'schema': {'fields': [{'name': 'id', 'type': 'number'},
+                       {'name': 'name', 'type': 'string'}]}}, {'name': 'number-two',
+ 'type': 'table',
+ 'path': 'table-reverse.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'name', 'type': 'string'}]}}]
+['first-resource', 'number-two']
+True</code></pre>
+
+  </div>
+  </div><h2>Saving Descriptor</h2>
+<p>As any of the Metadata classes the Package class can be saved as JSON or YAML:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+package = Package('tables/*')
+package.to_json('datapackage.json') # Save as JSON
+package.to_yaml('datapackage.yaml') # Save as YAML
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-package" class="livemark-reference-heading">Package <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-package" class="livemark-reference-heading">Package <small>(class)</small></h3>
+      <p>Package representation
+
+This class is one of the cornerstones of of Frictionless framework.
+It manages underlaying resource and provides an ability to describe a package.
+
+```python
+package = Package(resources=[Resource(path="data/table.csv")])
+package.get_resoure('table').read_rows() == [
+    {'id': 1, 'name': 'english'},
+    {'id': 2, 'name': '中国人'},</p>
+            <h4>Signature</h4>
+      <p>(*, source: Optional[Any] = None, control: Optional[Control] = None, basepath: Optional[str] = None, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, homepage: Optional[str] = None, profile: Optional[str] = None, licenses: List[Dict[str, Any]] = NOTHING, sources: List[Dict[str, Any]] = NOTHING, contributors: List[Dict[str, Any]] = NOTHING, keywords: List[str] = NOTHING, image: Optional[str] = None, version: Optional[str] = None, created: Optional[str] = None, resources: List[Resource] = NOTHING, dataset: Optional[Dataset] = None, dialect: Optional[Dialect] = None, detector: Optional[Detector] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any])        </li>
+                <li>
+          control
+          (Optional[Control])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          homepage
+          (Optional[str])        </li>
+                <li>
+          profile
+          (Optional[str])        </li>
+                <li>
+          licenses
+          (List[Dict[str, Any]])        </li>
+                <li>
+          sources
+          (List[Dict[str, Any]])        </li>
+                <li>
+          contributors
+          (List[Dict[str, Any]])        </li>
+                <li>
+          keywords
+          (List[str])        </li>
+                <li>
+          image
+          (Optional[str])        </li>
+                <li>
+          version
+          (Optional[str])        </li>
+                <li>
+          created
+          (Optional[str])        </li>
+                <li>
+          resources
+          (List[Resource])        </li>
+                <li>
+          dataset
+          (Optional[Dataset])        </li>
+                <li>
+          dialect
+          (Optional[Dialect])        </li>
+                <li>
+          detector
+          (Optional[Detector])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-package.source" class="livemark-reference-heading">package.source <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.control" class="livemark-reference-heading">package.control <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Control]</p>
+          </div>
+        <div>
+      <h3 id="reference-package._basepath" class="livemark-reference-heading">package._basepath <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.name" class="livemark-reference-heading">package.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “.”, “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.type" class="livemark-reference-heading">package.type <small>(property)</small></h3>
+      <p>
+    Type of the package
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.title" class="livemark-reference-heading">package.title <small>(property)</small></h3>
+      <p>
+    A Package title according to the specs
+    It should a human-oriented title of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.description" class="livemark-reference-heading">package.description <small>(property)</small></h3>
+      <p>
+    A Package description according to the specs
+    It should a human-oriented description of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.homepage" class="livemark-reference-heading">package.homepage <small>(property)</small></h3>
+      <p>
+    A URL for the home on the web that is related to this package.
+    For example, github repository or ckan dataset address.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.profile" class="livemark-reference-heading">package.profile <small>(property)</small></h3>
+      <p>
+    A fully-qualified URL that points directly to a JSON Schema
+    that can be used to validate the descriptor
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.licenses" class="livemark-reference-heading">package.licenses <small>(property)</small></h3>
+      <p>
+    The license(s) under which the package is provided.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.sources" class="livemark-reference-heading">package.sources <small>(property)</small></h3>
+      <p>
+    The raw sources for this data package.
+    It MUST be an array of Source objects.
+    Each Source object MUST have a title and
+    MAY have path and/or email properties.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.contributors" class="livemark-reference-heading">package.contributors <small>(property)</small></h3>
+      <p>
+    The people or organizations who contributed to this package.
+    It MUST be an array. Each entry is a Contributor and MUST be an object.
+    A Contributor MUST have a title property and MAY contain
+    path, email, role and organization properties.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.keywords" class="livemark-reference-heading">package.keywords <small>(property)</small></h3>
+      <p>
+    An Array of string keywords to assist users searching.
+    For example, ['data', 'fiscal']
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.image" class="livemark-reference-heading">package.image <small>(property)</small></h3>
+      <p>
+    An image to use for this data package.
+    For example, when showing the package in a listing.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.version" class="livemark-reference-heading">package.version <small>(property)</small></h3>
+      <p>
+    A version string identifying the version of the package.
+    It should conform to the Semantic Versioning requirements and
+    should follow the Data Package Version pattern.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.created" class="livemark-reference-heading">package.created <small>(property)</small></h3>
+      <p>
+    The datetime on which this was created.
+    The datetime must conform to the string formats for RFC3339 datetime,
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.resources" class="livemark-reference-heading">package.resources <small>(property)</small></h3>
+      <p>
+    A list of resource descriptors.
+    It can be dicts or Resource instances
+    </p>
+            <h4>Signature</h4>
+      <p>List[Resource]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.dataset" class="livemark-reference-heading">package.dataset <small>(property)</small></h3>
+      <p>
+    It returns reference to dataset of which catalog the package is part of. If package
+    is not part of any catalog, then it is set to None.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dataset]</p>
+          </div>
+        <div>
+      <h3 id="reference-package._dialect" class="livemark-reference-heading">package._dialect <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dialect]</p>
+          </div>
+        <div>
+      <h3 id="reference-package._detector" class="livemark-reference-heading">package._detector <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Detector]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-package.basepath" class="livemark-reference-heading">package.basepath <small>(property)</small></h3>
+      <p>A basepath of the package
+
+The normpath of the resource is joined `basepath` and `/path`</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.resource_names" class="livemark-reference-heading">package.resource_names <small>(property)</small></h3>
+      <p>Return names of resources</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-package.resource_paths" class="livemark-reference-heading">package.resource_paths <small>(property)</small></h3>
+      <p>Return names of resources</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-add_resource" class="livemark-reference-heading">package.add_resource <small>(method)</small></h3>
+      <p>Add new resource to the package</p>
+            <h4>Signature</h4>
+      <p>(resource: Union[Resource, str]) -&gt; Resource</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-analyze" class="livemark-reference-heading">package.analyze <small>(method)</small></h3>
+      <p>Analyze the resources of the package
+
+This feature is currently experimental, and its API may change
+without warning.</p>
+            <h4>Signature</h4>
+      <p>(*, detailed: bool = False)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          detailed
+          (bool)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-clear_resources" class="livemark-reference-heading">package.clear_resources <small>(method)</small></h3>
+      <p>Remove all the resources</p>
+                </div>
+        <div>
+      <h3 id="reference-dereference" class="livemark-reference-heading">package.dereference <small>(method)</small></h3>
+      <p>Dereference underlaying metadata
+
+If some of underlaying metadata is provided as a string
+it will replace it by the metadata object</p>
+                </div>
+        <div>
+      <h3 id="reference-describe" class="livemark-reference-heading">Package.describe <small>(method) (static)</small></h3>
+      <p>Describe the given source as a package</p>
+            <h4>Signature</h4>
+      <p>(source: Optional[Any] = None, *, stats: bool = False, **options: Any)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any]): data source        </li>
+                <li>
+          stats
+          (bool)        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-extract" class="livemark-reference-heading">package.extract <small>(method)</small></h3>
+      <p>Extract rows</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, filter: Optional[types.IFilterFunction] = None, process: Optional[types.IProcessFunction] = None, limit_rows: Optional[int] = None) -&gt; types.ITabularData</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          filter
+          (Optional[types.IFilterFunction]): row filter function        </li>
+                <li>
+          process
+          (Optional[types.IProcessFunction]): row processor function        </li>
+                <li>
+          limit_rows
+          (Optional[int]): limit amount of rows to this number        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-flatten" class="livemark-reference-heading">package.flatten <small>(method)</small></h3>
+      <p>Flatten the package
+
+Parameters
+    spec (str[]): flatten specification</p>
+            <h4>Signature</h4>
+      <p>(spec: List[str] = [name, path])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          spec
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-get_resource" class="livemark-reference-heading">package.get_resource <small>(method)</small></h3>
+      <p>Get resource by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Resource</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-get_table_resource" class="livemark-reference-heading">package.get_table_resource <small>(method)</small></h3>
+      <p>Get table resource by name (raise if not table)</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; TableResource</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_resource" class="livemark-reference-heading">package.has_resource <small>(method)</small></h3>
+      <p>Check if a resource is present</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_table_resource" class="livemark-reference-heading">package.has_table_resource <small>(method)</small></h3>
+      <p>Check if a table resource is present</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-infer" class="livemark-reference-heading">package.infer <small>(method)</small></h3>
+      <p>Infer metadata</p>
+            <h4>Signature</h4>
+      <p>(*, stats: bool = False) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          stats
+          (bool): stream files completely and infer stats        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-publish" class="livemark-reference-heading">package.publish <small>(method)</small></h3>
+      <p>Publish package to any supported data portal</p>
+            <h4>Signature</h4>
+      <p>(target: Any = None, *, control: Optional[Control] = None) -&gt; models.PublishResult</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          target
+          (Any): url e.g. "https://github.com/frictionlessdata/repository-demo" of target[CKAN/Github...]        </li>
+                <li>
+          control
+          (Optional[Control]): Github control        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-remove_resource" class="livemark-reference-heading">package.remove_resource <small>(method)</small></h3>
+      <p>Remove resource by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Resource</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_resource" class="livemark-reference-heading">package.set_resource <small>(method)</small></h3>
+      <p>Set resource by name</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource) -&gt; Optional[Resource]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_copy" class="livemark-reference-heading">package.to_copy <small>(method)</small></h3>
+      <p>Create a copy of the package</p>
+            <h4>Signature</h4>
+      <p>(**options: Any) -&gt; Self</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_er_diagram" class="livemark-reference-heading">package.to_er_diagram <small>(method)</small></h3>
+      <p>Generate ERD(Entity Relationship Diagram) from package resources
+
+and exports it as .dot file
+
+Based on:
+- https://github.com/frictionlessdata/frictionless-py/issues/1118</p>
+            <h4>Signature</h4>
+      <p>(path: Optional[str] = None) -&gt; str</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          path
+          (Optional[str]): target path        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-transform" class="livemark-reference-heading">package.transform <small>(method)</small></h3>
+      <p>Transform package</p>
+            <h4>Signature</h4>
+      <p>(: Package, pipeline: Pipeline)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          pipeline
+          (Pipeline)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-update_resource" class="livemark-reference-heading">package.update_resource <small>(method)</small></h3>
+      <p>Update resource</p>
+            <h4>Signature</h4>
+      <p>(name: str, descriptor: types.IDescriptor) -&gt; Resource</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          descriptor
+          (types.IDescriptor)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-validate" class="livemark-reference-heading">package.validate <small>(method)</small></h3>
+      <p>Validate package</p>
+            <h4>Signature</h4>
+      <p>(: Package, checklist: Optional[Checklist] = None, *, name: Optional[str] = None, parallel: bool = False, limit_rows: Optional[int] = None, limit_errors: int = 1000)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          checklist
+          (Optional[Checklist])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          parallel
+          (bool)        </li>
+                <li>
+          limit_rows
+          (Optional[int])        </li>
+                <li>
+          limit_errors
+          (int)        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="resource.html">
+        Resource Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="catalog.html">
+        « Catalog Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/pipeline.html b/docs/framework/pipeline.html
new file mode 100644
index 0000000000..afae345e50
--- /dev/null
+++ b/docs/framework/pipeline.html
@@ -0,0 +1,3783 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Pipeline is a object containg a list of transformation steps.">
+<meta name="keywords" content="pipeline,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Pipeline Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/pipeline.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Pipeline Class</h1>
+<p>Pipeline is a object containg a list of transformation steps.</p>
+<h2>Creating Pipeline</h2>
+<p>Let's create a pipeline using Python interface:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Pipeline, transform, steps
+
+pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name='name')])
+print(pipeline)
+</code></pre>
+
+<pre><code class="language-markup">{'steps': [{'type': 'table-normalize'},
+           {'type': 'table-melt', 'fieldName': 'name'}]}</code></pre>
+
+  </div>
+  </div><h2>Running Pipeline</h2>
+<p>To run a pipeline you need to use a transform function or method:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Pipeline, transform, steps
+
+pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name='name')])
+resource = transform('table.csv', pipeline=pipeline)
+print(resource.schema)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'variable', 'type': 'string'},
+            {'name': 'value', 'type': 'any'}]}
+[{'name': 'english', 'variable': 'id', 'value': 1}, {'name': '中国人', 'variable': 'id', 'value': 2}]</code></pre>
+
+  </div>
+  </div><h2>Transform Steps</h2>
+<p>The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation.</p>
+<blockquote>
+<p>This step uses PETL under the hood.</p>
+</blockquote>
+<div data-title="" python><pre><code class="language-python">from frictionless import Step
+
+class cell_set(Step):
+    code = "cell-set"
+
+    def __init__(self, descriptor=None, *, value=None, field_name=None):
+        self.setinitial("value", value)
+        self.setinitial("fieldName", field_name)
+        super().__init__(descriptor)
+
+    def transform_resource(self, resource):
+        value = self.get("value")
+        field_name = self.get("fieldName")
+        yield from resource.to_petl().update(field_name, value)
+</code></pre>
+</div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-pipeline" class="livemark-reference-heading">Pipeline <small>(class)</small></h3>
+          <h3 id="hidden-reference-step" class="livemark-reference-heading">Step <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-3-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-pipeline" class="livemark-reference-heading">Pipeline <small>(class)</small></h3>
+      <p>Pipeline representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, steps: List[Step] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          steps
+          (List[Step])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-pipeline.name" class="livemark-reference-heading">pipeline.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-pipeline.type" class="livemark-reference-heading">pipeline.type <small>(property)</small></h3>
+      <p>
+    Type of the package
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-pipeline.title" class="livemark-reference-heading">pipeline.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Pipeline.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-pipeline.description" class="livemark-reference-heading">pipeline.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Pipeline.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-pipeline.steps" class="livemark-reference-heading">pipeline.steps <small>(property)</small></h3>
+      <p>
+    List of transformation steps to apply.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Step]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-pipeline.step_types" class="livemark-reference-heading">pipeline.step_types <small>(property)</small></h3>
+      <p>Return type list of the steps</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-add_step" class="livemark-reference-heading">pipeline.add_step <small>(method)</small></h3>
+      <p>Add new step to the schema</p>
+            <h4>Signature</h4>
+      <p>(step: Step) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          step
+          (Step)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-clear_steps" class="livemark-reference-heading">pipeline.clear_steps <small>(method)</small></h3>
+      <p>Remove all the steps</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-get_step" class="livemark-reference-heading">pipeline.get_step <small>(method)</small></h3>
+      <p>Get step by type</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; Step</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_step" class="livemark-reference-heading">pipeline.has_step <small>(method)</small></h3>
+      <p>Check if a step is present</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-remove_step" class="livemark-reference-heading">pipeline.remove_step <small>(method)</small></h3>
+      <p>Remove step by type</p>
+            <h4>Signature</h4>
+      <p>(type: str) -&gt; Step</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_step" class="livemark-reference-heading">pipeline.set_step <small>(method)</small></h3>
+      <p>Set step by type</p>
+            <h4>Signature</h4>
+      <p>(step: Step) -&gt; Optional[Step]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          step
+          (Step)        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-step" class="livemark-reference-heading">Step <small>(class)</small></h3>
+      <p>Step representation.
+
+A base class for all the step subclasses.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-step.name" class="livemark-reference-heading">step.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-step.type" class="livemark-reference-heading">step.type <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name/type.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters. For example: "cell-fill".
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-step.title" class="livemark-reference-heading">step.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Step.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-step.description" class="livemark-reference-heading">step.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Step.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+        <div>
+      <h3 id="reference-transform_package" class="livemark-reference-heading">step.transform_package <small>(method)</small></h3>
+      <p>Transform package</p>
+            <h4>Signature</h4>
+      <p>(package: Package)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          package
+          (Package): package        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-transform_resource" class="livemark-reference-heading">step.transform_resource <small>(method)</small></h3>
+      <p>Transform resource</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource): resource        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="detector.html">
+        Detector Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="checklist.html">
+        « Checklist Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/report.html b/docs/framework/report.html
new file mode 100644
index 0000000000..b1ed2325c2
--- /dev/null
+++ b/docs/framework/report.html
@@ -0,0 +1,4027 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="All the  validate  functions return the Validation Report. It's an unified object containing information about a validation: source details, found error, etc. Let's explore a report:">
+<meta name="keywords" content="report,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Report Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/report.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Report Class</h1>
+<h2>Validation Report</h2>
+<p>All the <code>validate</code> functions return the Validation Report. It's an unified object containing information about a validation: source details, found error, etc. Let's explore a report:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital-invalid.csv', pick_errors=['duplicate-label'])
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 1, 'warnings': 0, 'seconds': 0.011},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 1,
+                      'warnings': 0,
+                      'seconds': 0.011,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3}]}]}</code></pre>
+
+  </div>
+  </div><p>As we can see, there are a lot of information; you can find its details description in "API Reference". Errors are grouped by tables; for some validation there are can be dozens of tables. Let's use the <code>report.flatten</code> function to simplify errors representation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('capital-invalid.csv', pick_errors=['duplicate-label'])
+pprint(report.flatten(['rowNumber', 'fieldNumber', 'code', 'message']))
+</code></pre>
+
+<pre><code class="language-markup">[[None,
+  3,
+  None,
+  'Label "name" in the header at position "3" is duplicated to a label: at '
+  'position "2"']]</code></pre>
+
+  </div>
+  </div><p>In some situation, an error can't be associated with a table; then it goes to the top-level <code>report.errors</code> property:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('bad.json', type='schema')
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 1, 'warnings': 0, 'seconds': 0.0},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'bad',
+            'type': 'json',
+            'valid': False,
+            'place': 'bad.json',
+            'labels': [],
+            'stats': {'errors': 1, 'warnings': 0, 'seconds': 0.0},
+            'warnings': [],
+            'errors': [{'type': 'schema-error',
+                        'title': 'Schema Error',
+                        'description': 'Provided schema is not valid.',
+                        'message': 'Schema is not valid: cannot retrieve '
+                                   'metadata "bad.json" because "[Errno 2] No '
+                                   'such file or directory: \'bad.json\'"',
+                        'tags': [],
+                        'note': 'cannot retrieve metadata "bad.json" because '
+                                '"[Errno 2] No such file or directory: '
+                                '\'bad.json\'"'}]}]}</code></pre>
+
+  </div>
+  </div><h2>Validation Errors</h2>
+<p>The Error object is at the heart of the validation process. The Report has <code>report.errors</code> and <code>report.tables[].errors</code> properties that can contain the Error object. Let's explore it:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital-invalid.csv', pick_errors=['duplicate-label'])
+error = report.task.error # it's only available for 1 table / 1 error sitution
+print(f'Type: "{error.type}"')
+print(f'Title: "{error.title}"')
+print(f'Tags: "{error.tags}"')
+print(f'Note: "{error.note}"')
+print(f'Message: "{error.message}"')
+print(f'Description: "{error.description}"')
+</code></pre>
+
+<pre><code class="language-markup">Type: "duplicate-label"
+Title: "Duplicate Label"
+Tags: "['#table', '#header', '#label']"
+Note: "at position "2""
+Message: "Label "name" in the header at position "3" is duplicated to a label: at position "2""
+Description: "Two columns in the header row have the same value. Column names should be unique."</code></pre>
+
+  </div>
+  </div><p>Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our <code>duplicate-label</code> error:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital-invalid.csv', pick_errors=['duplicate-label'])
+error = report.task.error # it's only available for 1 table / 1 error sitution
+print(error)
+</code></pre>
+
+<pre><code class="language-markup">{'type': 'duplicate-label',
+ 'title': 'Duplicate Label',
+ 'description': 'Two columns in the header row have the same value. Column '
+                'names should be unique.',
+ 'message': 'Label "name" in the header at position "3" is duplicated to a '
+            'label: at position "2"',
+ 'tags': ['#table', '#header', '#label'],
+ 'note': 'at position "2"',
+ 'labels': ['id', 'name', 'name'],
+ 'rowNumbers': [1],
+ 'label': 'name',
+ 'fieldName': 'name2',
+ 'fieldNumber': 3}</code></pre>
+
+  </div>
+  </div><p>Please explore "Errors Reference" to learn about all the available errors and their properties.</p>
+<h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-report" class="livemark-reference-heading">Report <small>(class)</small></h3>
+          <h3 id="hidden-reference-reporttask" class="livemark-reference-heading">ReportTask <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-report" class="livemark-reference-heading">Report <small>(class)</small></h3>
+      <p>Report representation.
+
+A class that stores the summary of the validation action.</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, valid: bool, stats: types.IReportStats, warnings: List[str] = NOTHING, errors: List[Error] = NOTHING, tasks: List[ReportTask] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          valid
+          (bool)        </li>
+                <li>
+          stats
+          (types.IReportStats)        </li>
+                <li>
+          warnings
+          (List[str])        </li>
+                <li>
+          errors
+          (List[Error])        </li>
+                <li>
+          tasks
+          (List[ReportTask])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-report.name" class="livemark-reference-heading">report.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.type" class="livemark-reference-heading">report.type <small>(property)</small></h3>
+      <p>
+    Type of the package
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.title" class="livemark-reference-heading">report.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Report.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.description" class="livemark-reference-heading">report.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Detector.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.valid" class="livemark-reference-heading">report.valid <small>(property)</small></h3>
+      <p>
+    Flag to specify if the data is valid or not.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-report.stats" class="livemark-reference-heading">report.stats <small>(property)</small></h3>
+      <p>
+    Additional statistics of the data as defined in Stats class.
+    </p>
+            <h4>Signature</h4>
+      <p>types.IReportStats</p>
+          </div>
+        <div>
+      <h3 id="reference-report.warnings" class="livemark-reference-heading">report.warnings <small>(property)</small></h3>
+      <p>
+    List of warnings raised while validating the data.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.errors" class="livemark-reference-heading">report.errors <small>(property)</small></h3>
+      <p>
+    List of errors raised while validating the data.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Error]</p>
+          </div>
+        <div>
+      <h3 id="reference-report.tasks" class="livemark-reference-heading">report.tasks <small>(property)</small></h3>
+      <p>
+    List of task that were applied during data validation.
+    </p>
+            <h4>Signature</h4>
+      <p>List[ReportTask]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-report.error" class="livemark-reference-heading">report.error <small>(property)</small></h3>
+      <p>Validation error (if there is only one)</p>
+          </div>
+        <div>
+      <h3 id="reference-report.task" class="livemark-reference-heading">report.task <small>(property)</small></h3>
+      <p>Validation task (if there is only one)</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-flatten" class="livemark-reference-heading">report.flatten <small>(method)</small></h3>
+      <p>Flatten the report
+
+Parameters
+    spec (str[]): flatten specification</p>
+            <h4>Signature</h4>
+      <p>(spec: List[str] = [taskNumber, rowNumber, fieldNumber, type])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          spec
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-from_validation" class="livemark-reference-heading">Report.from_validation <small>(method) (static)</small></h3>
+      <p>Create a report from a validation</p>
+            <h4>Signature</h4>
+      <p>(*, time: float = 0, tasks: List[ReportTask] = [], errors: List[Error] = [], warnings: List[str] = [])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          time
+          (float)        </li>
+                <li>
+          tasks
+          (List[ReportTask])        </li>
+                <li>
+          errors
+          (List[Error])        </li>
+                <li>
+          warnings
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-from_validation_reports" class="livemark-reference-heading">Report.from_validation_reports <small>(method) (static)</small></h3>
+      <p>Create a report from a set of validation reports</p>
+            <h4>Signature</h4>
+      <p>(*, time: float, reports: List[Report])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          time
+          (float)        </li>
+                <li>
+          reports
+          (List[Report])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-from_validation_task" class="livemark-reference-heading">Report.from_validation_task <small>(method) (static)</small></h3>
+      <p>Create a report from a validation task</p>
+            <h4>Signature</h4>
+      <p>(resource: Resource, *, time: float, labels: List[str] = [], errors: List[Error] = [], warnings: List[str] = [])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          resource
+          (Resource)        </li>
+                <li>
+          time
+          (float)        </li>
+                <li>
+          labels
+          (List[str])        </li>
+                <li>
+          errors
+          (List[Error])        </li>
+                <li>
+          warnings
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_summary" class="livemark-reference-heading">report.to_summary <small>(method)</small></h3>
+      <p>Summary of the report</p>
+                </div>
+    
+                    
+    <div>
+      <h3 id="reference-reporttask" class="livemark-reference-heading">ReportTask <small>(class)</small></h3>
+      <p>Report task representation.</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, type: Optional[str], title: Optional[str] = None, description: Optional[str] = None, valid: bool, place: str, labels: List[str], stats: types.IReportTaskStats, warnings: List[str] = NOTHING, errors: List[Error] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          type
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          valid
+          (bool)        </li>
+                <li>
+          place
+          (str)        </li>
+                <li>
+          labels
+          (List[str])        </li>
+                <li>
+          stats
+          (types.IReportTaskStats)        </li>
+                <li>
+          warnings
+          (List[str])        </li>
+                <li>
+          errors
+          (List[Error])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-reporttask.name" class="livemark-reference-heading">reportTask.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.type" class="livemark-reference-heading">reportTask.type <small>(property)</small></h3>
+      <p>
+    Sets the property tabular to True if the type is "table".
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.title" class="livemark-reference-heading">reportTask.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Report.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.description" class="livemark-reference-heading">reportTask.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Detector.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.valid" class="livemark-reference-heading">reportTask.valid <small>(property)</small></h3>
+      <p>
+    Flag to specify if the data is valid or not.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.place" class="livemark-reference-heading">reportTask.place <small>(property)</small></h3>
+      <p>
+    Specifies the place of the file. For example: "<memory>", "data/table.csv" etc.
+    </memory></p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.labels" class="livemark-reference-heading">reportTask.labels <small>(property)</small></h3>
+      <p>
+    List of labels of the task resource.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.stats" class="livemark-reference-heading">reportTask.stats <small>(property)</small></h3>
+      <p>
+    Additional statistics of the data as defined in Stats class.
+    </p>
+            <h4>Signature</h4>
+      <p>types.IReportTaskStats</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.warnings" class="livemark-reference-heading">reportTask.warnings <small>(property)</small></h3>
+      <p>
+    List of warnings raised while validating the data.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.errors" class="livemark-reference-heading">reportTask.errors <small>(property)</small></h3>
+      <p>
+    List of errors raised while validating the data.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Error]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-reporttask.error" class="livemark-reference-heading">reportTask.error <small>(property)</small></h3>
+      <p>Validation error if there is only one</p>
+          </div>
+        <div>
+      <h3 id="reference-reporttask.tabular" class="livemark-reference-heading">reportTask.tabular <small>(property)</small></h3>
+      <p>Whether task's resource is tabular</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-flatten" class="livemark-reference-heading">reportTask.flatten <small>(method)</small></h3>
+      <p>Flatten the report
+
+Parameters
+    spec (any[]): flatten specification</p>
+            <h4>Signature</h4>
+      <p>(spec: List[str] = [rowNumber, fieldNumber, type])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          spec
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_summary" class="livemark-reference-heading">reportTask.to_summary <small>(method)</small></h3>
+      <p>Generate summary for validation task"</p>
+            <h4>Signature</h4>
+      <p>() -&gt; str</p>
+                </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="table.html">
+        Table Classes »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="inquiry.html">
+        « Inquiry Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/resource.html b/docs/framework/resource.html
new file mode 100644
index 0000000000..e4ac69be08
--- /dev/null
+++ b/docs/framework/resource.html
@@ -0,0 +1,4701 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Resource class is arguable the most important class of the whole Frictionless Framework. It's based on  Data Resource Standard  and   Tabular Data Resource Standard">
+<meta name="keywords" content="resource,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Resource Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/resource.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Resource Class</h1>
+<p>The Resource class is arguable the most important class of the whole Frictionless Framework. It's based on <a href="https://specs.frictionlessdata.io/data-resource/">Data Resource Standard</a> and  <a href="https://specs.frictionlessdata.io/data-resource/">Tabular Data Resource Standard</a></p>
+<h2>Creating Resource</h2>
+<p>Let's create a data resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('table.csv') # from a resource path
+resource = Resource('resource.json') # from a descriptor path
+resource = Resource({'path': 'table.csv'}) # from a descriptor
+resource = Resource(path='table.csv') # from arguments
+</code></pre>
+
+  </div>
+  </div><p>As you can see it's possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it's a descriptor or a path). It's possible to make this step more explicit:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(path='data/table.csv') # from a path
+resource = Resource('data/resource.json') # from a descriptor
+</code></pre>
+
+  </div>
+  </div><h2>Describing Resource</h2>
+<p>The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(
+    name='resource',
+    title='My Resource',
+    description='My Resource for the Guide',
+    path='table.csv',
+    # it's possible to provide all the official properties like mediatype, etc
+)
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'resource',
+ 'type': 'table',
+ 'title': 'My Resource',
+ 'description': 'My Resource for the Guide',
+ 'path': 'table.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}</code></pre>
+
+  </div>
+  </div><p>If you have created a resource, for example, from a descriptor you can access this properties:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('resource.json')
+print(resource.name)
+# and others
+</code></pre>
+
+<pre><code class="language-markup">name</code></pre>
+
+  </div>
+  </div><p>And edit them:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('resource.json')
+resource.name = 'new-name'
+resource.title = 'New Title'
+resource.description = 'New Description'
+# and others
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'new-name',
+ 'type': 'table',
+ 'title': 'New Title',
+ 'description': 'New Description',
+ 'path': 'table.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}</code></pre>
+
+  </div>
+  </div><h2>Saving Descriptor</h2>
+<p>As any of the Metadata classes the Resource class can be saved as JSON or YAML:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+resource = Resource('table.csv')
+resource.to_json('resource.json') # Save as JSON
+resource.to_yaml('resource.yaml') # Save as YAML
+</code></pre>
+
+  </div>
+  </div><h2>Resource Lifecycle</h2>
+<p>You might have noticed that we had to duplicate the <code>with Resource(...)</code> statement in some examples. The reason is that Resource is a streaming interface. Once it's read you need to open it again. Let's show it in an example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource('capital-3.csv')
+resource.open()
+pprint(resource.read_rows())
+pprint(resource.read_rows())
+# We need to re-open: there is no data left
+resource.open()
+pprint(resource.read_rows())
+# We need to close manually: not context manager is used
+resource.close()
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'London'},
+ {'id': 2, 'name': 'Berlin'},
+ {'id': 3, 'name': 'Paris'},
+ {'id': 4, 'name': 'Madrid'},
+ {'id': 5, 'name': 'Rome'}]
+[]
+[{'id': 1, 'name': 'London'},
+ {'id': 2, 'name': 'Berlin'},
+ {'id': 3, 'name': 'Paris'},
+ {'id': 4, 'name': 'Madrid'},
+ {'id': 5, 'name': 'Rome'}]</code></pre>
+
+  </div>
+  </div><p>At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('capital-3.csv')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'London'},
+ {'id': 2, 'name': 'Berlin'},
+ {'id': 3, 'name': 'Paris'},
+ {'id': 4, 'name': 'Madrid'},
+ {'id': 5, 'name': 'Rome'}]</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>The Resource class is also a metadata class which provides various read and stream functions. The <code>extract</code> functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be <code>rows</code>, <code>data</code>, <code>text</code>, or <code>bytes</code>. Let's try reading all of them:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('country-3.csv')
+pprint(resource.read_bytes())
+pprint(resource.read_text())
+pprint(resource.read_cells())
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">(b'id,capital_id,name,population\n1,1,Britain,67\n2,3,France,67\n3,2,Germany,8'
+ b'3\n4,5,Italy,60\n5,4,Spain,47\n')
+''
+[['id', 'capital_id', 'name', 'population'],
+ ['1', '1', 'Britain', '67'],
+ ['2', '3', 'France', '67'],
+ ['3', '2', 'Germany', '83'],
+ ['4', '5', 'Italy', '60'],
+ ['5', '4', 'Spain', '47']]
+[{'id': 1, 'capital_id': 1, 'name': 'Britain', 'population': 67},
+ {'id': 2, 'capital_id': 3, 'name': 'France', 'population': 67},
+ {'id': 3, 'capital_id': 2, 'name': 'Germany', 'population': 83},
+ {'id': 4, 'capital_id': 5, 'name': 'Italy', 'population': 60},
+ {'id': 5, 'capital_id': 4, 'name': 'Spain', 'population': 47}]</code></pre>
+
+  </div>
+  </div><p>It's really handy to read all your data into memory but it's not always possible if a file is really big. For such cases, Frictionless provides streaming functions:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('country-3.csv') as resource:
+    pprint(resource.byte_stream)
+    pprint(resource.text_stream)
+    pprint(resource.cell_stream)
+    pprint(resource.row_stream)
+    for row in resource.row_stream:
+      print(row)
+</code></pre>
+
+<pre><code class="language-markup">&lt;frictionless.system.loader.ByteStreamWithStatsHandling object at 0x7f67687a27a0&gt;
+&lt;_io.TextIOWrapper name='country-3.csv' encoding='utf-8'&gt;
+&lt;itertools.chain object at 0x7f67687a2b60&gt;
+&lt;generator object TableResource.__open_row_stream.&lt;locals&gt;.row_stream at 0x7f67686d0a50&gt;
+{'id': 1, 'capital_id': 1, 'name': 'Britain', 'population': 67}
+{'id': 2, 'capital_id': 3, 'name': 'France', 'population': 67}
+{'id': 3, 'capital_id': 2, 'name': 'Germany', 'population': 83}
+{'id': 4, 'capital_id': 5, 'name': 'Italy', 'population': 60}
+{'id': 5, 'capital_id': 4, 'name': 'Spain', 'population': 47}</code></pre>
+
+  </div>
+  </div><h2>Indexing Data</h2>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    This functionality has been published in <code>frictionless@5.5</code> as a feature preview and request for comments. The implementation is raw and doesn't cover many edge cases.
+  </div>
+</div></div><p>Indexing resource in Frictionless terms means loading a data table into a database. Let's explore how this feature works in different modes.</p>
+<blockquote>
+<p>All the example are written for SQLite for simplicity</p>
+</blockquote>
+<h3>Normal Mode</h3>
+<p>This mode is supported for any database that is supported by <code>sqlalchemy</code>. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by <code>null</code> values and in-general it guarantees to finish successfully for any data even very invalid.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table
+frictionless extract sqlite:///index/project.db --table table --json
+</code></pre>
+
+<pre><code class="language-markup">──────────────────────────────────── Index ─────────────────────────────────────
+
+[table] Indexed 3 rows in 0.244 seconds
+──────────────────────────────────── Result ────────────────────────────────────
+Succesefully indexed 1 tables
+{
+  "project": [
+    {
+      "id": 1,
+      "name": "english"
+    },
+    {
+      "id": 2,
+      "name": "中国人"
+    }
+  ]
+}</code></pre>
+
+  </div>
+    <div id="livemark-tabs-11-Python" class="tab-pane fade ">
+    <pre><code class="language-python">import sqlite3
+from frictionless import Resource, formats
+
+resource = Resource('table.csv')
+resource.index('sqlite:///index/project.db', name='table')
+print(Resource('sqlite:///index/project.db', control=formats.sql.SqlControl(table='table')).extract())
+</code></pre>
+
+<pre><code class="language-markup">{'project': [{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]}</code></pre>
+
+  </div>
+  </div><h3>Fast Mode</h3>
+<div data-type="warning"><div class="livemark-remark">
+  <div class="alert alert-warning" role="alert">
+    For the SQLite in fast mode, it requires <code>sqlite3@3.34+</code> command to be available.
+  </div>
+</div></div><p>Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using <code>COPY</code> in Potgresql and <code>.import</code> in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table --fast
+frictionless extract sqlite:///index/project.db --table table --json
+</code></pre>
+
+<pre><code class="language-markup">──────────────────────────────────── Index ─────────────────────────────────────
+
+[table] Indexed 30 bytes in 0.248 seconds
+──────────────────────────────────── Result ────────────────────────────────────
+Succesefully indexed 1 tables
+{
+  "project": [
+    {
+      "id": 1,
+      "name": "english"
+    },
+    {
+      "id": 2,
+      "name": "中国人"
+    }
+  ]
+}</code></pre>
+
+  </div>
+    <div id="livemark-tabs-12-Python" class="tab-pane fade ">
+    <pre><code class="language-python">import sqlite3
+from frictionless import Resource, formats
+
+resource = Resource('table.csv')
+resource.index('sqlite:///index/project.db', name='table', fast=True)
+print(Resource('sqlite:///index/project.db', control=formats.sql.SqlControl(table='table')).extract())
+</code></pre>
+
+<pre><code class="language-markup">{'project': [{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]}</code></pre>
+
+  </div>
+  </div><h4>Solution 1: Fallback</h4>
+<p>To ensure that the data will be successfully indexed it's possible to use <code>fallback</code> option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-13-Python" class="tab-pane fade ">
+    <pre><code class="language-python">import sqlite3
+from frictionless import Resource, formats
+
+resource = Resource('table.csv')
+resource.index('sqlite:///index/project.db', name='table', fast=True, fallback=True)
+</code></pre>
+
+  </div>
+  </div><h4>Solution 2: QSV</h4>
+<p>Another option is to provide a path to <a href="https://github.com/jqnatividad/qsv">QSV</a> binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-14-Python" class="tab-pane fade ">
+    <pre><code class="language-python">import sqlite3
+from frictionless import Resource, formats
+
+resource = Resource('table.csv')
+resource.index('sqlite:///index/project.db', name='table', fast=True, qsv_path='qsv_path')
+</code></pre>
+
+  </div>
+  </div><h2>Scheme</h2>
+<p>The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be <code>file</code> (default), <code>text</code>, <code>http</code>, <code>https</code>, <code>s3</code>, and others.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource(b'header1,header2\nvalue1,value2', format='csv') as resource:
+  print(resource.scheme)
+  print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">buffer
++----------+----------+
+| header1  | header2  |
++==========+==========+
+| 'value1' | 'value2' |
++----------+----------+</code></pre>
+
+  </div>
+  </div><h2>Format</h2>
+<p>The format or as it's also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are <code>csv</code>, <code>xlsx</code>, <code>json</code> and others</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource(b'header1,header2\nvalue1,value2.csv', format='csv') as resource:
+  print(resource.format)
+  print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">csv
++----------+--------------+
+| header1  | header2      |
++==========+==============+
+| 'value1' | 'value2.csv' |
++----------+--------------+</code></pre>
+
+  </div>
+  </div><h2>Encoding</h2>
+<p>Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It's possible to provide an encoding manually:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('country-3.csv', encoding='utf-8') as resource:
+  print(resource.encoding)
+  print(resource.path)
+</code></pre>
+
+<pre><code class="language-markup">utf-8
+country-3.csv</code></pre>
+
+  </div>
+  </div><div><pre><code>utf-8
+data/country-3.csv
+</code></pre>
+</div><h2>Innerpath</h2>
+<p>By default, Frictionless uses the first file found in a zip archive. It's possible to adjust this behaviour:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-18-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('table-multiple-files.zip', innerpath='table-reverse.csv') as resource:
+  print(resource.compression)
+  print(resource.innerpath)
+  print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">zip
+table-reverse.csv
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | '中国人'     |
++----+-----------+
+|  2 | 'english' |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Compression</h2>
+<p>It's possible to adjust compression detection by providing the algorithm explicitly. For the example below it's not required as it would be detected anyway:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-19-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-19-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('table.csv.zip', compression='zip') as resource:
+  print(resource.compression)
+  print(resource.to_view())
+</code></pre>
+
+<pre><code class="language-markup">zip
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | '中国人'     |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h2>Dialect</h2>
+<p>Please read <a href="dialect.html">Table Dialect Guide</a> for more information.</p>
+<h2>Schema</h2>
+<p>Please read <a href="schema.html">Table Schema Guide</a> for more information.</p>
+<h2>Checklist</h2>
+<p>Please read <a href="checklist.html">Checklist Guide</a> for more information.</p>
+<h2>Pipeline</h2>
+<p>Please read <a href="pipeline.html">Pipeline Guide</a> for more information.</p>
+<h2>Stats</h2>
+<p>Resource's stats can be accessed with <code>resource.stats</code>:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-20-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('table.csv')
+resource.infer(stats=True)
+print(resource.stats)
+</code></pre>
+
+<pre><code class="language-markup">&lt;frictionless.resource.stats.ResourceStats object at 0x7f6767a313f0&gt;</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-21-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-resource" class="livemark-reference-heading">Resource <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-21-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-resource" class="livemark-reference-heading">Resource <small>(class)</small></h3>
+      <p>Resource representation.
+
+This class is one of the cornerstones of of Frictionless framework.
+It loads a data source, and allows you to stream its parsed contents.
+At the same time, it's a metadata class data description.
+
+```python
+with Resource("data/table.csv") as resource:
+    resource.header == ["id", "name"]
+    resource.read_rows() == [
+        {'id': 1, 'name': 'english'},
+        {'id': 2, 'name': '中国人'},
+    ]
+```</p>
+            <h4>Signature</h4>
+      <p>(*, source: Optional[Any] = None, control: Optional[Control] = None, packagify: bool = False, name: Optional[str] = , title: Optional[str] = None, description: Optional[str] = None, homepage: Optional[str] = None, profile: Optional[str] = None, licenses: List[Dict[str, Any]] = NOTHING, sources: List[Dict[str, Any]] = NOTHING, path: Optional[str] = None, data: Optional[Any] = None, scheme: Optional[str] = None, format: Optional[str] = None, datatype: Optional[str] = , mediatype: Optional[str] = None, compression: Optional[str] = None, extrapaths: List[str] = NOTHING, innerpath: Optional[str] = None, encoding: Optional[str] = None, hash: Optional[str] = None, bytes: Optional[int] = None, fields: Optional[int] = None, rows: Optional[int] = None, dialect: Union[Dialect, str] = NOTHING, schema: Union[Schema, str] = NOTHING, basepath: Optional[str] = None, detector: Detector = NOTHING, package: Optional[Package] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any])        </li>
+                <li>
+          control
+          (Optional[Control])        </li>
+                <li>
+          packagify
+          (bool)        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          homepage
+          (Optional[str])        </li>
+                <li>
+          profile
+          (Optional[str])        </li>
+                <li>
+          licenses
+          (List[Dict[str, Any]])        </li>
+                <li>
+          sources
+          (List[Dict[str, Any]])        </li>
+                <li>
+          path
+          (Optional[str])        </li>
+                <li>
+          data
+          (Optional[Any])        </li>
+                <li>
+          scheme
+          (Optional[str])        </li>
+                <li>
+          format
+          (Optional[str])        </li>
+                <li>
+          datatype
+          (Optional[str])        </li>
+                <li>
+          mediatype
+          (Optional[str])        </li>
+                <li>
+          compression
+          (Optional[str])        </li>
+                <li>
+          extrapaths
+          (List[str])        </li>
+                <li>
+          innerpath
+          (Optional[str])        </li>
+                <li>
+          encoding
+          (Optional[str])        </li>
+                <li>
+          hash
+          (Optional[str])        </li>
+                <li>
+          bytes
+          (Optional[int])        </li>
+                <li>
+          fields
+          (Optional[int])        </li>
+                <li>
+          rows
+          (Optional[int])        </li>
+                <li>
+          dialect
+          (Union[Dialect, str])        </li>
+                <li>
+          schema
+          (Union[Schema, str])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          detector
+          (Detector)        </li>
+                <li>
+          package
+          (Optional[Package])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-resource.source" class="livemark-reference-heading">resource.source <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.control" class="livemark-reference-heading">resource.control <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Control]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.packagify" class="livemark-reference-heading">resource.packagify <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-resource._name" class="livemark-reference-heading">resource._name <small>(property)</small></h3>
+      <p>
+    Resource name according to the specs.
+    It should be a slugified name of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.type" class="livemark-reference-heading">resource.type <small>(property)</small></h3>
+      <p>
+    Type of the resource
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.title" class="livemark-reference-heading">resource.title <small>(property)</small></h3>
+      <p>
+    Resource title according to the specs
+    It should a human-oriented title of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.description" class="livemark-reference-heading">resource.description <small>(property)</small></h3>
+      <p>
+    Resource description according to the specs
+    It should a human-oriented description of the resource.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.homepage" class="livemark-reference-heading">resource.homepage <small>(property)</small></h3>
+      <p>
+    A URL for the home on the web that is related to this package.
+    For example, github repository or ckan dataset address.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.profile" class="livemark-reference-heading">resource.profile <small>(property)</small></h3>
+      <p>
+    A fully-qualified URL that points directly to a JSON Schema
+    that can be used to validate the descriptor
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.licenses" class="livemark-reference-heading">resource.licenses <small>(property)</small></h3>
+      <p>
+    The license(s) under which the resource is provided.
+    If omitted it's considered the same as the package's licenses.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.sources" class="livemark-reference-heading">resource.sources <small>(property)</small></h3>
+      <p>
+    The raw sources for this data resource.
+    It MUST be an array of Source objects.
+    Each Source object MUST have a title and
+    MAY have path and/or email properties.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.path" class="livemark-reference-heading">resource.path <small>(property)</small></h3>
+      <p>
+    Path to data source
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.data" class="livemark-reference-heading">resource.data <small>(property)</small></h3>
+      <p>
+    Inline data source
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.scheme" class="livemark-reference-heading">resource.scheme <small>(property)</small></h3>
+      <p>
+    Scheme for loading the file (file, http, ...).
+    If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.format" class="livemark-reference-heading">resource.format <small>(property)</small></h3>
+      <p>
+    File source's format (csv, xls, ...).
+    If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource._datatype" class="livemark-reference-heading">resource._datatype <small>(property)</small></h3>
+      <p>
+    Frictionless Framework specific data type as "table" or "schema"
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.mediatype" class="livemark-reference-heading">resource.mediatype <small>(property)</small></h3>
+      <p>
+    Mediatype/mimetype of the resource e.g. “text/csv”,
+    or “application/vnd.ms-excel”.  Mediatypes are maintained by the
+    Internet Assigned Numbers Authority (IANA) in a media type registry.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.compression" class="livemark-reference-heading">resource.compression <small>(property)</small></h3>
+      <p>
+    Source file compression (zip, ...).
+    If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.extrapaths" class="livemark-reference-heading">resource.extrapaths <small>(property)</small></h3>
+      <p>
+    List of paths to concatenate to the main path.
+    It's used for multipart resources.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.innerpath" class="livemark-reference-heading">resource.innerpath <small>(property)</small></h3>
+      <p>
+    Path within the compressed file.
+    It defaults to the first file in the archive (if the source is an archive).
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.encoding" class="livemark-reference-heading">resource.encoding <small>(property)</small></h3>
+      <p>
+    Source encoding.
+    If not set, it'll be inferred from `source`.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.hash" class="livemark-reference-heading">resource.hash <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.bytes" class="livemark-reference-heading">resource.bytes <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.fields" class="livemark-reference-heading">resource.fields <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.rows" class="livemark-reference-heading">resource.rows <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource._dialect" class="livemark-reference-heading">resource._dialect <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Union[Dialect, str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource._schema" class="livemark-reference-heading">resource._schema <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Union[Schema, str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource._basepath" class="livemark-reference-heading">resource._basepath <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.detector" class="livemark-reference-heading">resource.detector <small>(property)</small></h3>
+      <p>
+    File/table detector.
+    For more information, please check the Detector documentation.
+    </p>
+            <h4>Signature</h4>
+      <p>Detector</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.package" class="livemark-reference-heading">resource.package <small>(property)</small></h3>
+      <p>
+    Parental to this resource package.
+    For more information, please check the Package documentation.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Package]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.stats" class="livemark-reference-heading">resource.stats <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>ResourceStats</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.tabular" class="livemark-reference-heading">resource.tabular <small>(property)</small></h3>
+      <p>
+    Whether the resoruce is tabular
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[bool]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-resource.basepath" class="livemark-reference-heading">resource.basepath <small>(property)</small></h3>
+      <p>A basepath of the resource
+
+The normpath of the resource is joined `basepath` and `/path`</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.buffer" class="livemark-reference-heading">resource.buffer <small>(property)</small></h3>
+      <p>File's bytes used as a sample
+
+These buffer bytes are used to infer characteristics of the
+source file (e.g. encoding, ...).</p>
+            <h4>Signature</h4>
+      <p>types.IBuffer</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.byte_stream" class="livemark-reference-heading">resource.byte_stream <small>(property)</small></h3>
+      <p>Byte stream in form of a generator</p>
+            <h4>Signature</h4>
+      <p>types.IByteStream</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.closed" class="livemark-reference-heading">resource.closed <small>(property)</small></h3>
+      <p>Whether the table is closed</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.memory" class="livemark-reference-heading">resource.memory <small>(property)</small></h3>
+      <p>Whether resource is not path based</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.multipart" class="livemark-reference-heading">resource.multipart <small>(property)</small></h3>
+      <p>Whether resource is multipart</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.normpath" class="livemark-reference-heading">resource.normpath <small>(property)</small></h3>
+      <p>Normalized path of the resource or raise if not set</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.normpaths" class="livemark-reference-heading">resource.normpaths <small>(property)</small></h3>
+      <p>Normalized paths of the resource</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.paths" class="livemark-reference-heading">resource.paths <small>(property)</small></h3>
+      <p>All paths of the resource</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.place" class="livemark-reference-heading">resource.place <small>(property)</small></h3>
+      <p>Stringified resource location</p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.remote" class="livemark-reference-heading">resource.remote <small>(property)</small></h3>
+      <p>Whether resource is remote</p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+        <div>
+      <h3 id="reference-resource.text_stream" class="livemark-reference-heading">resource.text_stream <small>(property)</small></h3>
+      <p>Text stream in form of a generator</p>
+            <h4>Signature</h4>
+      <p>types.ITextStream</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-close" class="livemark-reference-heading">resource.close <small>(method)</small></h3>
+      <p>Close the resource as "filelike.close" does</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-dereference" class="livemark-reference-heading">resource.dereference <small>(method)</small></h3>
+      <p>Dereference underlaying metadata
+
+If some of underlaying metadata is provided as a string
+it will replace it by the metadata object</p>
+                </div>
+        <div>
+      <h3 id="reference-describe" class="livemark-reference-heading">Resource.describe <small>(method) (static)</small></h3>
+      <p>Describe the given source as a resource</p>
+            <h4>Signature</h4>
+      <p>(source: Optional[Any] = None, *, name: Optional[str] = None, type: Optional[str] = None, stats: bool = False, **options: Any) -&gt; Metadata</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any]): data source        </li>
+                <li>
+          name
+          (Optional[str]): resoucrce name        </li>
+                <li>
+          type
+          (Optional[str]): data type: "package", "resource", "dialect", or "schema"        </li>
+                <li>
+          stats
+          (bool): if `True` infer resource's stats        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-infer" class="livemark-reference-heading">resource.infer <small>(method)</small></h3>
+      <p>Infer metadata</p>
+            <h4>Signature</h4>
+      <p>(*, stats: bool = False) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          stats
+          (bool): stream file completely and infer stats        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-list" class="livemark-reference-heading">resource.list <small>(method)</small></h3>
+      <p>List dataset resources</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None) -&gt; List[Resource]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str]): limit to one resource (if applicable)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-open" class="livemark-reference-heading">resource.open <small>(method)</small></h3>
+      <p>Open the resource as "io.open" does</p>
+                </div>
+        <div>
+      <h3 id="reference-read_bytes" class="livemark-reference-heading">resource.read_bytes <small>(method)</small></h3>
+      <p>Read bytes into memory</p>
+            <h4>Signature</h4>
+      <p>(*, size: Optional[int] = None) -&gt; bytes</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          size
+          (Optional[int])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_data" class="livemark-reference-heading">resource.read_data <small>(method)</small></h3>
+      <p>Read data into memory</p>
+            <h4>Signature</h4>
+      <p>(*, size: Optional[int] = None) -&gt; Any</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          size
+          (Optional[int])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_text" class="livemark-reference-heading">resource.read_text <small>(method)</small></h3>
+      <p>Read text into memory</p>
+            <h4>Signature</h4>
+      <p>(*, size: Optional[int] = None) -&gt; str</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          size
+          (Optional[int])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_copy" class="livemark-reference-heading">resource.to_copy <small>(method)</small></h3>
+      <p>Create a copy from the resource</p>
+            <h4>Signature</h4>
+      <p>(**options: Any) -&gt; Self</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-validate" class="livemark-reference-heading">resource.validate <small>(method)</small></h3>
+      <p>Validate resource</p>
+            <h4>Signature</h4>
+      <p>(checklist: Optional[Checklist] = None, *, name: Optional[str] = None, on_row: Optional[types.ICallbackFunction] = None, parallel: bool = False, limit_rows: Optional[int] = None, limit_errors: int = 1000) -&gt; Report</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          checklist
+          (Optional[Checklist]): a Checklist object        </li>
+                <li>
+          name
+          (Optional[str]): limit validation to one resource (if applicable)        </li>
+                <li>
+          on_row
+          (Optional[types.ICallbackFunction]): callbacke for every row        </li>
+                <li>
+          parallel
+          (bool)        </li>
+                <li>
+          limit_rows
+          (Optional[int]): limit amount of rows to this number        </li>
+                <li>
+          limit_errors
+          (int): limit amount of errors to this number        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="dialect.html">
+        Dialect Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="package.html">
+        « Package Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/schema.html b/docs/framework/schema.html
new file mode 100644
index 0000000000..d6311e0922
--- /dev/null
+++ b/docs/framework/schema.html
@@ -0,0 +1,4256 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read  Table Schema Standard  for more information.">
+<meta name="keywords" content="schema,class">
+<link rel="icon" href="../../assets/logo.png">
+<title>Schema Class | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/schema.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Schema Class</h1>
+<p>The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read <a href="https://specs.frictionlessdata.io/table-schema/">Table Schema Standard</a> for more information.</p>
+<h2>Creating Schema</h2>
+<p>Let's create a table schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields, describe
+
+schema = describe('table.csv', type='schema') # from a resource path
+schema = Schema.from_descriptor('schema.json') # from a descriptor path
+schema = Schema.from_descriptor({'fields': [{'name': 'id', 'type': 'integer'}]}) # from a descriptor
+</code></pre>
+
+  </div>
+  </div><p>As you can see it's possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it's a dict or a path). It's possible to make this step more explicit:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, Field
+
+schema = Schema(fields=[fields.StringField(name='id')]) # from fields
+schema = Schema.from_descriptor('schema.json') # from a descriptor
+</code></pre>
+
+  </div>
+  </div><h2>Describing Schema</h2>
+<p>The standard support some additional schema's metadata:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields
+
+schema = Schema(
+    fields=[fields.StringField(name='id')],
+    missing_values=['na'],
+    primary_key=['id'],
+    # foreign_keys
+)
+print(schema)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'string'}],
+ 'missingValues': ['na'],
+ 'primaryKey': ['id']}</code></pre>
+
+  </div>
+  </div><p>If you have created a schema, for example, from a descriptor you can access this properties:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema
+
+schema = Schema.from_descriptor('schema.json')
+print(schema.missing_values)
+# and others
+</code></pre>
+
+<pre><code class="language-markup">['']</code></pre>
+
+  </div>
+  </div><p>And edit them:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema
+
+schema = Schema.from_descriptor('schema.json')
+schema.missing_values.append('-')
+# and others
+print(schema)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'}],
+ 'missingValues': ['', '-']}</code></pre>
+
+  </div>
+  </div><h2>Field Management</h2>
+<p>The Schema class provides useful methods to manage fields:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields
+
+schema = Schema.from_descriptor('schema.json')
+print(schema.fields)
+print(schema.field_names)
+schema.add_field(fields.StringField(name='new-name'))
+field = schema.get_field('new-name')
+print(schema.has_field('new-name'))
+schema.remove_field('new-name')
+</code></pre>
+
+<pre><code class="language-markup">[{'name': 'id', 'type': 'integer'}, {'name': 'name', 'type': 'string'}]
+['id', 'name']
+True</code></pre>
+
+  </div>
+  </div><h2>Saving Descriptor</h2>
+<p>As any of the Metadata classes the Schema class can be saved as JSON or YAML:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields
+schema = Schema(fields=[fields.IntegerField(name='id')])
+schema.to_json('schema.json') # Save as JSON
+schema.to_yaml('schema.yaml') # Save as YAML
+</code></pre>
+
+  </div>
+  </div><h2>Reading Cells</h2>
+<p>During the process of data reading a resource uses a schema to convert data:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields
+
+schema = Schema(fields=[fields.IntegerField(name='integer'), fields.StringField(name='string')])
+cells, notes = schema.read_cells(['3', 'value'])
+print(cells)
+</code></pre>
+
+<pre><code class="language-markup">[3, 'value']</code></pre>
+
+  </div>
+  </div><h2>Writing Cells</h2>
+<p>During the process of data writing a resource uses a schema to convert data:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, fields
+
+schema = Schema(fields=[fields.IntegerField(name='integer'), fields.StringField(name='string')])
+cells, notes = schema.write_cells([3, 'value'])
+print(cells)
+</code></pre>
+
+<pre><code class="language-markup">[3, 'value']</code></pre>
+
+  </div>
+  </div><h2>Creating Field</h2>
+<p>Let's create a field:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import fields
+
+field = fields.IntegerField(name='name')
+print(field)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'name', 'type': 'integer'}</code></pre>
+
+  </div>
+  </div><p>Usually we work with fields which were already created by a schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('table.csv')
+field = resource.schema.get_field('id')
+print(field)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'id', 'type': 'integer'}</code></pre>
+
+  </div>
+  </div><h2>Field Types</h2>
+<p>Frictionless Framework supports all the <a href="https://specs.frictionlessdata.io/table-schema/#types-and-formats">Table Schema Standard</a> field types along with an ability to create custom types.</p>
+<p>For some types there are additional properties available:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('table.csv')
+field = resource.schema.get_field('id') # it's an integer
+print(field.bare_number)
+</code></pre>
+
+<pre><code class="language-markup">True</code></pre>
+
+  </div>
+  </div><p>See the complete reference at <a href="../fields/any.html">Tabular Fields</a>.</p>
+<h2>Reading Cell</h2>
+<p>During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import fields
+
+field = fields.IntegerField(name='name')
+cell, note = field.read_cell('3')
+print(cell)
+</code></pre>
+
+<pre><code class="language-markup">3</code></pre>
+
+  </div>
+  </div><h2>Writing Cell</h2>
+<p>During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import fields
+
+field = fields.IntegerField(name='name')
+cell, note = field.write_cell(3)
+print(cell)
+</code></pre>
+
+<pre><code class="language-markup">3</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-schema" class="livemark-reference-heading">Schema <small>(class)</small></h3>
+          <h3 id="hidden-reference-field" class="livemark-reference-heading">Field <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-15-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-schema" class="livemark-reference-heading">Schema <small>(class)</small></h3>
+      <p>Schema representation
+
+This class is one of the cornerstones of of Frictionless framework.
+It allow to work with Table Schema and its fields.
+
+```python
+schema = Schema('schema.json')
+schema.add_fied(Field(name='name', type='string'))
+```</p>
+            <h4>Signature</h4>
+      <p>(*, descriptor: Optional[Union[types.IDescriptor, str]] = None, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, fields: List[Field] = NOTHING, missing_values: List[str] = NOTHING, primary_key: List[str] = NOTHING, foreign_keys: List[Dict[str, Any]] = NOTHING) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          descriptor
+          (Optional[Union[types.IDescriptor, str]])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          fields
+          (List[Field])        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          primary_key
+          (List[str])        </li>
+                <li>
+          foreign_keys
+          (List[Dict[str, Any]])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-schema.descriptor" class="livemark-reference-heading">schema.descriptor <small>(property)</small></h3>
+      <p>
+    # TODO: add docs
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Union[types.IDescriptor, str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.name" class="livemark-reference-heading">schema.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.type" class="livemark-reference-heading">schema.type <small>(property)</small></h3>
+      <p>
+    Type of the object
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[Union[str, None]]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.title" class="livemark-reference-heading">schema.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Schema.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.description" class="livemark-reference-heading">schema.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Schema.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.fields" class="livemark-reference-heading">schema.fields <small>(property)</small></h3>
+      <p>
+    A List of fields in the schema.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Field]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.missing_values" class="livemark-reference-heading">schema.missing_values <small>(property)</small></h3>
+      <p>
+    List of string values to be set as missing values in the schema fields. If any of string in
+    missing values is found in any of the field value then it is set as None.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.primary_key" class="livemark-reference-heading">schema.primary_key <small>(property)</small></h3>
+      <p>
+    Specifies primary key for the schema.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.foreign_keys" class="livemark-reference-heading">schema.foreign_keys <small>(property)</small></h3>
+      <p>
+    Specifies the foreign keys for the schema.
+    </p>
+            <h4>Signature</h4>
+      <p>List[Dict[str, Any]]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-schema.field_names" class="livemark-reference-heading">schema.field_names <small>(property)</small></h3>
+      <p>List of field names</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-schema.field_types" class="livemark-reference-heading">schema.field_types <small>(property)</small></h3>
+      <p>List of field types</p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-add_field" class="livemark-reference-heading">schema.add_field <small>(method)</small></h3>
+      <p>Add new field to the schema</p>
+            <h4>Signature</h4>
+      <p>(field: Field, *, position: Optional[int] = None) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          field
+          (Field)        </li>
+                <li>
+          position
+          (Optional[int])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-clear_fields" class="livemark-reference-heading">schema.clear_fields <small>(method)</small></h3>
+      <p>Remove all the fields</p>
+            <h4>Signature</h4>
+      <p>() -&gt; None</p>
+                </div>
+        <div>
+      <h3 id="reference-describe" class="livemark-reference-heading">Schema.describe <small>(method) (static)</small></h3>
+      <p>Describe the given source as a schema</p>
+            <h4>Signature</h4>
+      <p>(source: Optional[Any] = None, **options: Any) -&gt; Schema</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          source
+          (Optional[Any]): data source        </li>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-flatten" class="livemark-reference-heading">schema.flatten <small>(method)</small></h3>
+      <p>Flatten the schema
+
+Parameters
+    spec (str[]): flatten specification</p>
+            <h4>Signature</h4>
+      <p>(spec: List[str] = [name, type])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          spec
+          (List[str])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-from_jsonschema" class="livemark-reference-heading">Schema.from_jsonschema <small>(method) (static)</small></h3>
+      <p>Create a Schema from JSONSchema profile</p>
+            <h4>Signature</h4>
+      <p>(profile: Union[types.IDescriptor, str]) -&gt; Schema</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          profile
+          (Union[types.IDescriptor, str]): path or dict with JSONSchema profile        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-get_field" class="livemark-reference-heading">schema.get_field <small>(method)</small></h3>
+      <p>Get field by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Field</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-has_field" class="livemark-reference-heading">schema.has_field <small>(method)</small></h3>
+      <p>Check if a field is present</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; bool</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-read_cells" class="livemark-reference-heading">schema.read_cells <small>(method)</small></h3>
+      <p>Read a list of cells (normalize/cast)</p>
+            <h4>Signature</h4>
+      <p>(cells: List[Any])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          cells
+          (List[Any]): list of cells        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-remove_field" class="livemark-reference-heading">schema.remove_field <small>(method)</small></h3>
+      <p>Remove field by name</p>
+            <h4>Signature</h4>
+      <p>(name: str) -&gt; Field</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_field" class="livemark-reference-heading">schema.set_field <small>(method)</small></h3>
+      <p>Set field by name</p>
+            <h4>Signature</h4>
+      <p>(field: Field) -&gt; Optional[Field]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          field
+          (Field)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-set_field_type" class="livemark-reference-heading">schema.set_field_type <small>(method)</small></h3>
+      <p>Set field type</p>
+            <h4>Signature</h4>
+      <p>(name: str, type: str) -&gt; Field</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          type
+          (str)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_excel_template" class="livemark-reference-heading">schema.to_excel_template <small>(method)</small></h3>
+      <p>Export schema as an excel template</p>
+            <h4>Signature</h4>
+      <p>(path: str) -&gt; None</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          path
+          (str): path of excel file to create with ".xlsx" extension        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_summary" class="livemark-reference-heading">schema.to_summary <small>(method)</small></h3>
+      <p>Summary of the schema in table format</p>
+            <h4>Signature</h4>
+      <p>() -&gt; str</p>
+                </div>
+        <div>
+      <h3 id="reference-update_field" class="livemark-reference-heading">schema.update_field <small>(method)</small></h3>
+      <p>Update field</p>
+            <h4>Signature</h4>
+      <p>(name: str, descriptor: types.IDescriptor) -&gt; Field</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          descriptor
+          (types.IDescriptor)        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-write_cells" class="livemark-reference-heading">schema.write_cells <small>(method)</small></h3>
+      <p>Write a list of cells (normalize/uncast)</p>
+            <h4>Signature</h4>
+      <p>(cells: List[Any], *, types: List[str] = [])</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          cells
+          (List[Any]): list of cells        </li>
+                <li>
+          types
+          (List[str])        </li>
+              </ul>
+          </div>
+    
+                    
+    <div>
+      <h3 id="reference-field" class="livemark-reference-heading">Field <small>(class)</small></h3>
+      <p>Field representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: str, title: Optional[str] = None, description: Optional[str] = None, format: str = default, missing_values: List[str] = NOTHING, constraints: Dict[str, Any] = NOTHING, rdf_type: Optional[str] = None, example: Optional[str] = None, schema: Optional[Schema] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          format
+          (str)        </li>
+                <li>
+          missing_values
+          (List[str])        </li>
+                <li>
+          constraints
+          (Dict[str, Any])        </li>
+                <li>
+          rdf_type
+          (Optional[str])        </li>
+                <li>
+          example
+          (Optional[str])        </li>
+                <li>
+          schema
+          (Optional[Schema])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-field.name" class="livemark-reference-heading">field.name <small>(property)</small></h3>
+      <p>
+    A short url-usable (and preferably human-readable) name.
+    This MUST be lower-case and contain only alphanumeric characters
+    along with “_” or “-” characters.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-field.type" class="livemark-reference-heading">field.type <small>(property)</small></h3>
+      <p>
+    Type of the field such as "boolean", "integer" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.title" class="livemark-reference-heading">field.title <small>(property)</small></h3>
+      <p>
+    A human-oriented title for the Field.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.description" class="livemark-reference-heading">field.description <small>(property)</small></h3>
+      <p>
+    A brief description of the Field.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.format" class="livemark-reference-heading">field.format <small>(property)</small></h3>
+      <p>
+    Format of the field to specify different value readers for the field type.
+    For example: "default","array" etc.
+    </p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+        <div>
+      <h3 id="reference-field.missing_values" class="livemark-reference-heading">field.missing_values <small>(property)</small></h3>
+      <p>
+    List of string values to be set as missing values in the field. If any of string in missing values
+    is found in the field value then it is set as None.
+    </p>
+            <h4>Signature</h4>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.constraints" class="livemark-reference-heading">field.constraints <small>(property)</small></h3>
+      <p>
+    A dictionary with rules that constraints the data value permitted for a field.
+    </p>
+            <h4>Signature</h4>
+      <p>Dict[str, Any]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.rdf_type" class="livemark-reference-heading">field.rdf_type <small>(property)</small></h3>
+      <p>
+    RDF type. Indicates whether the field is of RDF type.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.example" class="livemark-reference-heading">field.example <small>(property)</small></h3>
+      <p>
+    An example of a value for the field.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.schema" class="livemark-reference-heading">field.schema <small>(property)</small></h3>
+      <p>
+    Schema class of which the field is part of.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Schema]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.builtin" class="livemark-reference-heading">field.builtin <small>(property)</small></h3>
+      <p>
+    Specifies if field is the builtin feature.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[bool]</p>
+          </div>
+        <div>
+      <h3 id="reference-field.supported_constraints" class="livemark-reference-heading">field.supported_constraints <small>(property)</small></h3>
+      <p>
+    List of supported constraints for a field.
+    </p>
+            <h4>Signature</h4>
+      <p>ClassVar[List[str]]</p>
+          </div>
+    
+        <div>
+      <h3 id="reference-field.required" class="livemark-reference-heading">field.required <small>(property)</small></h3>
+      <p>Indicates if field is mandatory.</p>
+            <h4>Signature</h4>
+      <p>(bool) -&gt; </p>
+          </div>
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="checklist.html">
+        Checklist Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="dialect.html">
+        « Dialect Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/framework/table.html b/docs/framework/table.html
new file mode 100644
index 0000000000..b0f0735b4f
--- /dev/null
+++ b/docs/framework/table.html
@@ -0,0 +1,3749 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="After opening a resource you get access to a  resource.header  object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let's take a look:">
+<meta name="keywords" content="table,classes">
+<link rel="icon" href="../../assets/logo.png">
+<title>Table Classes | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/framework/table.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Table Classes</h1>
+<h2>Table Header</h2>
+<p>After opening a resource you get access to a <code>resource.header</code> object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let's take a look:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('capital-3.csv') as resource:
+  print(f'Header: {resource.header}')
+  print(f'Labels: {resource.header.labels}')
+  print(f'Fields: {resource.header.fields}')
+  print(f'Field Names: {resource.header.field_names}')
+  print(f'Field Numbers: {resource.header.field_numbers}')
+  print(f'Errors: {resource.header.errors}')
+  print(f'Valid: {resource.header.valid}')
+  print(f'As List: {resource.header.to_list()}')
+</code></pre>
+
+<pre><code class="language-markup">Header: ['id', 'name']
+Labels: ['id', 'name']
+Fields: [{'name': 'id', 'type': 'integer'}, {'name': 'name', 'type': 'string'}]
+Field Names: ['id', 'name']
+Field Numbers: [1, 2]
+Errors: []
+Valid: True
+As List: ['id', 'name']</code></pre>
+
+  </div>
+  </div><p>The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+with Resource([['name', 'name'], ['value', 'value']]) as resource:
+    pprint(resource.header.errors)
+</code></pre>
+
+<pre><code class="language-markup">[{'type': 'duplicate-label',
+ 'title': 'Duplicate Label',
+ 'description': 'Two columns in the header row have the same value. Column '
+                'names should be unique.',
+ 'message': 'Label "name" in the header at position "2" is duplicated to a '
+            'label: at position "1"',
+ 'tags': ['#table', '#header', '#label'],
+ 'note': 'at position "1"',
+ 'labels': ['name', 'name'],
+ 'rowNumbers': [1],
+ 'label': 'name',
+ 'fieldName': 'name2',
+ 'fieldNumber': 2}]</code></pre>
+
+  </div>
+  </div><h2>Table Row</h2>
+<p>The <code>extract</code>, <code>resource.read_rows()</code> and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the <a href="/docs/guides/framework/detector-guide">Detector object</a>, which tweaks how different aspects of metadata are detected.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Detector
+
+detector = Detector(schema_patch={'missingValues': ['1']})
+with Resource('capital-3.csv', detector=detector) as resource:
+  for row in resource.row_stream:
+    print(f'Row: {row}')
+    print(f'Cells: {row.cells}')
+    print(f'Fields: {row.fields}')
+    print(f'Field Names: {row.field_names}')
+    print(f'Value of field "name": {row["name"]}') # accessed as a dict
+    print(f'Row Number: {row.row_number}') # counted row number starting from 1
+    print(f'Blank Cells: {row.blank_cells}')
+    print(f'Error Cells: {row.error_cells}')
+    print(f'Errors: {row.errors}')
+    print(f'Valid: {row.valid}')
+    print(f'As Dict: {row.to_dict(json=False)}')
+    print(f'As List: {row.to_list(json=True)}') # JSON compatible data types
+    break
+</code></pre>
+
+<pre><code class="language-markup">Row: {'id': None, 'name': 'London'}
+Cells: ['1', 'London']
+Fields: [{'name': 'id', 'type': 'integer'}, {'name': 'name', 'type': 'string'}]
+Field Names: ['id', 'name']
+Value of field "name": London
+Row Number: 2
+Blank Cells: {'id': '1'}
+Error Cells: {}
+Errors: []
+Valid: True
+As Dict: {'id': None, 'name': 'London'}
+As List: [None, 'London']</code></pre>
+
+  </div>
+  </div><p>As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+with Resource([['name'], ['value', 'value']]) as resource:
+    for row in resource.row_stream:
+        pprint(row.errors)
+</code></pre>
+
+<pre><code class="language-markup">[{'type': 'extra-cell',
+ 'title': 'Extra Cell',
+ 'description': 'This row has more values compared to the header row (the '
+                'first row in the data source). A key concept is that all the '
+                'rows in tabular data must have the same number of columns.',
+ 'message': 'Row at position "2" has an extra value in field at position "2"',
+ 'tags': ['#table', '#row', '#cell'],
+ 'note': '',
+ 'cells': ['value', 'value'],
+ 'rowNumber': 2,
+ 'cell': 'value',
+ 'fieldName': '',
+ 'fieldNumber': 2}]</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-header" class="livemark-reference-heading">Header <small>(class)</small></h3>
+          <h3 id="hidden-reference-row" class="livemark-reference-heading">Row <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-header" class="livemark-reference-heading">Header <small>(class)</small></h3>
+      <p>Header representation
+
+&gt; Constructor of this object is not Public API</p>
+            <h4>Signature</h4>
+      <p>(labels: List[str], *, fields: List[Field], row_numbers: List[int], ignore_case: bool = False)</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          labels
+          (List[str]): header row labels        </li>
+                <li>
+          fields
+          (List[Field]): table fields        </li>
+                <li>
+          row_numbers
+          (List[int]): row numbers        </li>
+                <li>
+          ignore_case
+          (bool): ignore case        </li>
+              </ul>
+                </div>
+
+    
+    
+        <div>
+      <h3 id="reference-to_list" class="livemark-reference-heading">header.to_list <small>(method)</small></h3>
+      <p>Convert to a list</p>
+                </div>
+        <div>
+      <h3 id="reference-to_str" class="livemark-reference-heading">header.to_str <small>(method)</small></h3>
+      <p></p>
+                </div>
+    
+                    
+    <div>
+      <h3 id="reference-row" class="livemark-reference-heading">Row <small>(class)</small></h3>
+      <p>Row representation
+
+&gt; Constructor of this object is not Public API
+
+This object is returned by `extract`, `resource.read_rows`, and other functions.
+
+```python
+rows = extract("data/table.csv")
+for row in rows:
+    # work with the Row
+```</p>
+            <h4>Signature</h4>
+      <p>(cells: List[Any], *, field_info: Dict[str, Any], row_number: int)</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          cells
+          (List[Any]): array of cells        </li>
+                <li>
+          field_info
+          (Dict[str, Any]): special field info structure        </li>
+                <li>
+          row_number
+          (int): row number from 1        </li>
+              </ul>
+                </div>
+
+    
+    
+        <div>
+      <h3 id="reference-to_dict" class="livemark-reference-heading">row.to_dict <small>(method)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>(*, csv: bool = False, json: bool = False, types: Optional[List[str]] = None) -&gt; Dict[str, Any]</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          csv
+          (bool)        </li>
+                <li>
+          json
+          (bool): make data types compatible with JSON format        </li>
+                <li>
+          types
+          (Optional[List[str]])        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_list" class="livemark-reference-heading">row.to_list <small>(method)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>(*, json: bool = False, types: Optional[List[str]] = None)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          json
+          (bool): make data types compatible with JSON format        </li>
+                <li>
+          types
+          (Optional[List[str]]): list of supported types        </li>
+              </ul>
+          </div>
+        <div>
+      <h3 id="reference-to_str" class="livemark-reference-heading">row.to_str <small>(method)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>(**options: Any)</p>
+                  <h4>Parameters</h4>
+      <ul>
+                <li>
+          options
+          (Any)        </li>
+              </ul>
+          </div>
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="error.html">
+        Error Class »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="report.html">
+        « Report Class
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/getting-started.html b/docs/getting-started.html
new file mode 100644
index 0000000000..d118a8ae7a
--- /dev/null
+++ b/docs/getting-started.html
@@ -0,0 +1,3783 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Let's get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework's basic functionality.">
+<meta name="keywords" content="getting,started">
+<link rel="icon" href="../assets/logo.png">
+<title>Getting Started | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  active">
+      <a href="getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/getting-started.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Getting Started</h1>
+<p>Let's get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework's basic functionality.</p>
+<h2>Installation</h2>
+<blockquote>
+<p>The framework requires Python3.8+. Versioning follows the <a href="https://semver.org/">SemVer Standard</a>.</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless
+pip install frictionless[sql] # to install a core plugin (optional)
+pip install 'frictionless[sql]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><p>The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the "File Formats" and schemes in "File Schemes" menu). Usually, you don't need to think about it in advance–frictionless will display a useful error message about a missing plugin with installation instructions.</p>
+<h3>Troubleshooting</h3>
+<p>Did you have an error installing Frictionless? Here are some dependencies and common errors:</p>
+<ul>
+<li><code>pip: command not found</code>. Please see the <a href="https://pip.pypa.io/en/stable/installing/">pip docs</a> for help installing pip.</li>
+<li><a href="https://docs.python.org/3/using/mac.html">Installing Python help (Mac)</a></li>
+<li><a href="https://docs.python.org/3/using/windows.html">Installing Python help (Windows)</a></li>
+</ul>
+<p>Still having a problem? Ask us for help on our <a href="https://discord.com/invite/j9DNFNw">Discord</a> chat or open an <a href="https://github.com/frictionlessdata/frictionless-py/issues">issue</a>. We're happy to help!</p>
+<h2>Usage</h2>
+<p>The framework can be used:</p>
+<ul>
+<li>as a Python library</li>
+<li>as a command-line interface</li>
+<li>as a restful API server (for advanced use cases)</li>
+</ul>
+<p>For instance, all the examples below do the same thing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-API" class="nav-link " data-toggle="tab">
+      API
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract data/table.csv
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import extract
+
+rows = extract('data/table.csv')
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-API" class="tab-pane fade ">
+    <pre><code class="language-json">[POST] /extract {"path': 'data/table.csv"}
+</code></pre>
+
+  </div>
+  </div><p>All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it's straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: <a href="../../docs/framework/schema.html#reference">Schema  API Reference</a>.</p>
+<p>Arguments conform to the following naming convention:</p>
+<ul>
+<li>for Python interfaces, they are snake_cased, e.g. <code>missing_values</code></li>
+<li>within dictionaries or JSON objects, they are camelCased, e.g. <code>missingValues</code></li>
+<li>in the command line, they use dashes, e.g. <code>--missing-values</code></li>
+</ul>
+<p>To get the documentation for a command-line interface just use the <code>--help</code> flag:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless --help
+frictionless describe --help
+frictionless extract --help
+frictionless validate --help
+frictionless transform --help
+</code></pre>
+
+  </div>
+  </div><h2>Example</h2>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv"><code>invalid.csv</code></a> to reproduce the examples (right-click and "Save link as"). For more examples, please take a look at the <a href="basic-examples.md">Basic Examples</a> article.</p>
+</blockquote>
+<p>We will take a very messy data file:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name,,name
+1,english
+1,english
+
+2,german,1,2,3</code></pre>
+
+  </div>
+    <div id="livemark-tabs-4-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('invalid.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,name,,name
+1,english
+1,english
+
+2,german,1,2,3</code></pre>
+
+  </div>
+  </div><p>First of all, let's use <code>describe</code> to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data:</p>
+<blockquote>
+<p>The CLI output is in <a href="https://yaml.org/">YAML</a>, it is a default Frictionless output format.</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe invalid.csv
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+             dataset
+┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓
+┃ name    ┃ type  ┃ path        ┃
+┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩
+│ invalid │ table │ invalid.csv │
+└─────────┴───────┴─────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                invalid
+┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
+┃ id      ┃ name   ┃ field3  ┃ name2   ┃
+┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━┩
+│ integer │ string │ integer │ integer │
+└─────────┴────────┴─────────┴─────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-5-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import describe
+
+resource = describe('invalid.csv')
+pprint(resource)
+</code></pre>
+
+<pre><code class="language-python">{'name': 'invalid',
+ 'type': 'table',
+ 'path': 'invalid.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'encoding': 'utf-8',
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'name', 'type': 'string'},
+                       {'name': 'field3', 'type': 'integer'},
+                       {'name': 'name2', 'type': 'integer'}]}}</code></pre>
+
+  </div>
+  </div><p>Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use <code>extract</code> to read the normalized tabular data from the source CSV file:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+             dataset
+┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓
+┃ name    ┃ type  ┃ path        ┃
+┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩
+│ invalid │ table │ invalid.csv │
+└─────────┴───────┴─────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+              invalid
+┏━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
+┃ id   ┃ name    ┃ field3 ┃ name2 ┃
+┡━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
+│ 1    │ english │ None   │ None  │
+│ 1    │ english │ None   │ None  │
+│ None │ None    │ None   │ None  │
+│ 2    │ german  │ 1      │ 2     │
+└──────┴─────────┴────────┴───────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-6-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('invalid.csv')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-python">{'invalid': [{'field3': None, 'id': 1, 'name': 'english', 'name2': None},
+             {'field3': None, 'id': 1, 'name': 'english', 'name2': None},
+             {'field3': None, 'id': None, 'name': None, 'name2': None},
+             {'field3': 1, 'id': 2, 'name': 'german', 'name2': 2}]}</code></pre>
+
+  </div>
+  </div><p>Last but not least, let's get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                  dataset
+┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name    ┃ type  ┃ path        ┃ status  ┃
+┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ invalid │ table │ invalid.csv │ INVALID │
+└─────────┴───────┴─────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                    invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ blank-label     │ Label in the header in field at position    │
+│      │       │                 │ "3" is blank                                │
+│ None │ 4     │ duplicate-label │ Label "name" in the header at position "4"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 2    │ 3     │ missing-cell    │ Row at position "2" has a missing cell in   │
+│      │       │                 │ field "field3" at position "3"              │
+│ 2    │ 4     │ missing-cell    │ Row at position "2" has a missing cell in   │
+│      │       │                 │ field "name2" at position "4"               │
+│ 3    │ 3     │ missing-cell    │ Row at position "3" has a missing cell in   │
+│      │       │                 │ field "field3" at position "3"              │
+│ 3    │ 4     │ missing-cell    │ Row at position "3" has a missing cell in   │
+│      │       │                 │ field "name2" at position "4"               │
+│ 4    │ None  │ blank-row       │ Row at position "4" is completely blank     │
+│ 5    │ 5     │ extra-cell      │ Row at position "5" has an extra value in   │
+│      │       │                 │ field at position "5"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-7-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('invalid.csv')
+pprint(report.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+
+<pre><code class="language-python">[[None, 3, 'blank-label'],
+ [None, 4, 'duplicate-label'],
+ [2, 3, 'missing-cell'],
+ [2, 4, 'missing-cell'],
+ [3, 3, 'missing-cell'],
+ [3, 4, 'missing-cell'],
+ [4, None, 'blank-row'],
+ [5, 5, 'extra-cell']]</code></pre>
+
+  </div>
+  </div><p>Now that we have all this information:</p>
+<ul>
+<li>we can clean up the table to ensure the data quality</li>
+<li>we can use the metadata to describe and share the dataset</li>
+<li>we can include the validation into our workflow to guarantee the validity</li>
+<li>and much more: don't hesitate and read the following sections of the documentation!</li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="basic-examples.html">
+        Basic Examples »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../index.html">
+        « frictionless-py
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': 'getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': 'basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': 'universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/guides/describing-data.html b/docs/guides/describing-data.html
new file mode 100644
index 0000000000..81efba64a1
--- /dev/null
+++ b/docs/guides/describing-data.html
@@ -0,0 +1,4792 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content='In Frictionless terms, "Describing data" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information:'>
+<meta name="keywords" content="describing,data">
+<link rel="icon" href="../../assets/logo.png">
+<title>Describing Data | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/guides/describing-data.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Describing Data</h1>
+<blockquote>
+<p>This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a> and <a href="https://framework.frictionlessdata.io/docs/guides/quick-start">Quick Start</a>. Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work.</p>
+</blockquote>
+<p>In Frictionless terms, "Describing data" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information:</p>
+<ul>
+<li>the meaning of the fields e.g., what the <code>size</code> field means (does that field mean geographic size? Or does it refer to the size of the file?)</li>
+<li>data type information e.g., is this field a string or an integer?</li>
+<li>data constraints e.g., the minimum temperature for your measurements</li>
+<li>data relations e.g., identifier connections</li>
+<li>and others</li>
+</ul>
+<p>For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called "normalized data" and it occurs very often in scientific and other kinds of research.</p>
+<p>Now that we have a general understanding of what "describing data" is, we can discuss why it is important:</p>
+<ul>
+<li><strong>data validation</strong>: metadata helps to reveal problems in your data during early stages of your workflow</li>
+<li><strong>data publication</strong>: metadata provides additional information that your data doesn't include</li>
+</ul>
+<p>These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main <code>describe</code> functions (<code>describe</code>, <code>Schema.describe</code>, <code>Resource.describe</code>, <code>Package.describe</code>) and will then go into more detail about how to create and edit metadata in Frictionless.</p>
+<p>For the following examples, you will need to have Frictionless installed. See our <a href="https://framework.frictionlessdata.io/docs/guides/quick-start">Quick Start Guide</a> if you need help.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless
+</code></pre>
+
+  </div>
+  </div><h2>Describe Functions</h2>
+<p>The <code>describe</code> functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs.</p>
+<p>The frictionless framework provides 4 different <code>describe</code> functions in Python:</p>
+<ul>
+<li><code>describe</code>: detects the source type and returns Data Resource or Data Package metadata</li>
+<li><code>Schema.describe</code>: always returns Table Schema metadata</li>
+<li><code>Resource.describe</code>: always returns Data Resource metadata</li>
+<li><code>Package.describe</code>: always returns Data Package metadata</li>
+</ul>
+<p>As described in more detail in the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a>, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema.</p>
+<p>In the command-line, there is only 1 command (<code>describe</code>) but there is also a flag to adjust the behavior:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe your-table.csv
+frictionless describe your-table.csv --type schema
+frictionless describe your-table.csv --type resource
+frictionless describe your-table.csv --type package
+</code></pre>
+
+  </div>
+  </div><p>Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It's recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as <code>table.schema.yaml</code>, Data Resource as <code>table.resource.yaml</code>, and Data Package as <code>table.package.yaml</code>. If there is no hint in the file name Frictionless will assume that it's a resource descriptor by default.</p>
+<p>For example, if we want a Data Package descriptor for a single file:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv"><code>table.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe table.csv --type package
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+           dataset
+┏━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┓
+┃ name  ┃ type  ┃ path      ┃
+┡━━━━━━━╇━━━━━━━╇━━━━━━━━━━━┩
+│ table │ table │ table.csv │
+└───────┴───────┴───────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+       table
+┏━━━━━━━━━┳━━━━━━━━┓
+┃ id      ┃ name   ┃
+┡━━━━━━━━━╇━━━━━━━━┩
+│ integer │ string │
+└─────────┴────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-3-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+package = describe("table.csv", type="package")
+print(package.to_yaml())
+</code></pre>
+
+<pre><code class="language-yaml">resources:
+  - name: table
+    type: table
+    path: table.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string</code></pre>
+
+  </div>
+  </div><h2>Describing a Schema</h2>
+<p>Table Schema is a specification for providing a "schema" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column ("string", "number", "date", etc.), constraints on the value ("this string can only be at most 10 characters long"), and the expected format of the data ("this field should only contain strings that look like email addresses"). Table Schema can also specify relations between data tables.</p>
+<p>We're going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv"><code>country-1.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country-1.csv
+</code></pre>
+
+<pre><code class="language-markup">id,neighbor_id,name,population
+1,,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+    <div id="livemark-tabs-4-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country-1.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,neighbor_id,name,population
+1,,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+  </div><p>Let's get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema
+
+schema = Schema.describe("country-1.csv")
+schema.to_yaml("country.schema.yaml") # use schema.to_json for JSON
+</code></pre>
+
+  </div>
+  </div><p>The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country.schema.yaml
+</code></pre>
+
+<pre><code class="language-yaml">fields:
+  - name: id
+    type: integer
+  - name: neighbor_id
+    type: integer
+  - name: name
+    type: string
+  - name: population
+    type: integer</code></pre>
+
+  </div>
+    <div id="livemark-tabs-6-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country.schema.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">fields:
+  - name: id
+    type: integer
+  - name: neighbor_id
+    type: integer
+  - name: name
+    type: string
+  - name: population
+    type: integer</code></pre>
+
+  </div>
+  </div><p>As we can see, we were able to infer basic metadata from our data file. But describing data doesn't end here - we can provide additional information that we discussed earlier:</p>
+<blockquote>
+<p>You can edit "country.schema.yaml" manually instead of running Python</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema
+
+schema = Schema.describe("country-1.csv")
+schema.get_field("id").title = "Identifier"
+schema.get_field("neighbor_id").title = "Identifier of the neighbor"
+schema.get_field("name").title = "Name of the country"
+schema.get_field("population").title = "Population"
+schema.get_field("population").description = "According to the year 2020's data"
+schema.get_field("population").constraints["minimum"] = 0
+schema.foreign_keys.append(
+    {"fields": ["neighbor_id"], "reference": {"resource": "", "fields": ["id"]}}
+)
+schema.to_yaml("country.schema-full.yaml")
+</code></pre>
+
+  </div>
+  </div><p>Let's break it down:</p>
+<ul>
+<li>we added a title for all the fields</li>
+<li>we added a description to the "Population" field; the year information can be critical to interpret the data</li>
+<li>we set a constraint to the "Population" field because it can't be less than 0</li>
+<li>we added a foreign key saying that "Identifier of the neighbor" should be present in the "Identifier" field</li>
+</ul>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country.schema-full.yaml
+</code></pre>
+
+<pre><code class="language-yaml">fields:
+  - name: id
+    type: integer
+    title: Identifier
+  - name: neighbor_id
+    type: integer
+    title: Identifier of the neighbor
+  - name: name
+    type: string
+    title: Name of the country
+  - name: population
+    type: integer
+    title: Population
+    description: According to the year 2020's data
+    constraints:
+      minimum: 0
+foreignKeys:
+  - fields:
+      - neighbor_id
+    reference:
+      resource: ''
+      fields:
+        - id</code></pre>
+
+  </div>
+    <div id="livemark-tabs-8-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country.schema-full.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">fields:
+  - name: id
+    type: integer
+    title: Identifier
+  - name: neighbor_id
+    type: integer
+    title: Identifier of the neighbor
+  - name: name
+    type: string
+    title: Name of the country
+  - name: population
+    type: integer
+    title: Population
+    description: According to the year 2020's data
+    constraints:
+      minimum: 0
+foreignKeys:
+  - fields:
+      - neighbor_id
+    reference:
+      resource: ''
+      fields:
+        - id</code></pre>
+
+  </div>
+  </div><p>Later we're going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata.</p>
+<p>To continue learning about table schemas please read:</p>
+<ul>
+<li><a href="https://specs.frictionlessdata.io/table-schema/">Table Schema Spec</a></li>
+<li><a href="../../docs/framework/schema.html#reference-schema">API Reference: Schema</a></li>
+</ul>
+<h2>Describing a Resource</h2>
+<p>The Data Resource format describes a data resource such as an individual file or data table.
+The essence of a Data Resource is a path to the data file it describes.
+A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data.</p>
+<p>For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the ";" character and there is a comment on the top:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv"><code>country-2.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country-2.csv
+</code></pre>
+
+<pre><code class="language-markup"># Author: the scientist
+id;neighbor_id;name;population
+1;;Britain;67
+2;3;France;67
+3;2;Germany;83
+4;5;Italy;60
+5;4;Spain;47</code></pre>
+
+  </div>
+    <div id="livemark-tabs-9-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country-2.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup"># Author: the scientist
+id;neighbor_id;name;population
+1;;Britain;67
+2;3;France;67
+3;2;Germany;83
+4;5;Italy;60
+5;4;Spain;47</code></pre>
+
+  </div>
+  </div><p>Let's describe it:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe country-2.csv
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ country-2 │ table │ country-2.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+         country-2
+┏━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ # Author: the scientist ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ string                  │
+└─────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-10-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('country-2.csv')
+print(resource.to_yaml())
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+schema:
+  fields:
+    - name: '# Author: the scientist'
+      type: string</code></pre>
+
+  </div>
+  </div><p>OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the "Introductory Guide" Frictionless is capable of inferring some complicated cases' metadata but our data table is too complex for it to automatically infer. We need to manually program it:</p>
+<blockquote>
+<p>You can edit "country.resource.yaml" manually instead of running Python</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Schema, describe
+
+resource = describe("country-2.csv")
+resource.dialect.header_rows = [2]
+resource.dialect.get_control('csv').delimiter = ";"
+resource.schema = "country.schema.yaml"
+resource.to_yaml("country.resource-cleaned.yaml")
+</code></pre>
+
+  </div>
+  </div><p>So what we did here:</p>
+<ul>
+<li>we set the header rows to be row number 2; as humans, we can easily see that was the proper row</li>
+<li>we set the CSV Delimiter to be ";"</li>
+<li>we reuse the schema we created <a href="#describing-a-schema">earlier</a> as the data has the same structure and meaning</li>
+</ul>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country.resource-cleaned.yaml
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+  csv:
+    delimiter: ;
+schema: country.schema.yaml</code></pre>
+
+  </div>
+    <div id="livemark-tabs-12-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country.resource-cleaned.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+  csv:
+    delimiter: ;
+schema: country.schema.yaml</code></pre>
+
+  </div>
+  </div><p>Our resource metadata includes the schema metadata we created earlier, but it also has:</p>
+<ul>
+<li>general information about the file's schema, format, and compression</li>
+<li>information about CSV Dialect which helps software understand how to read it</li>
+<li>checksum information like hash, bytes, and rows</li>
+</ul>
+<p>But the most important difference is that the resource metadata contains the <code>path</code> property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, <code>data/country-2.csv</code> in our case.</p>
+<p>Using programming terminology we could say that:</p>
+<ul>
+<li>Table Schema descriptor is abstract (for a class of files)</li>
+<li>Data Resource descriptor is concrete (for an individual file)</li>
+</ul>
+<p>We will show the practical difference in the "Using Metadata" section, but in the next section, we will overview the Data Package specification.</p>
+<p>To continue learning about data resources please read:</p>
+<ul>
+<li><a href="https://specs.frictionlessdata.io/data-resource/">Data Resource Spec</a></li>
+<li><a href="../../docs/framework/resource.html#reference-resource">API Reference: Resource</a></li>
+</ul>
+<h2>Describing a Package</h2>
+<p>A Data Package consists of:</p>
+<ul>
+<li>Metadata that describes the structure and contents of the package</li>
+<li>Resources such as data files that form the contents of the package</li>
+</ul>
+<p>The Data Package metadata is stored in a "descriptor". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below.</p>
+<p>In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data.</p>
+<p>The data included in the package may be provided as:</p>
+<ul>
+<li>Files bundled locally with the package descriptor</li>
+<li>Remote resources, referenced by URL (see the <a href="https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial">schemes tutorial</a> for more information about supported URLs)</li>
+<li>"Inline" data (see below) which is included directly in the descriptor</li>
+</ul>
+<p>For this section, we will use the following files:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv"><code>country-3.csv</code></a> to reproduce the examples (right-click and "Save link as")</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country-3.csv
+</code></pre>
+
+<pre><code class="language-markup">id,capital_id,name,population
+1,1,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+    <div id="livemark-tabs-13-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country-3.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,capital_id,name,population
+1,1,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+  </div><blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv"><code>capital-3.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat capital-3.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+    <div id="livemark-tabs-14-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('capital-3.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+  </div><p>First of all, let's describe our package now. We did it before for a resource but now we're going to use a glob pattern to indicate that there are multiple files:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe *-3.csv
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ capital-3 │ table │ capital-3.csv │
+│ country-3 │ table │ country-3.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+     capital-3
+┏━━━━━━━━━┳━━━━━━━━┓
+┃ id      ┃ name   ┃
+┡━━━━━━━━━╇━━━━━━━━┩
+│ integer │ string │
+└─────────┴────────┘
+                  country-3
+┏━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id      ┃ capital_id ┃ name   ┃ population ┃
+┡━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━┩
+│ integer │ integer    │ string │ integer    │
+└─────────┴────────────┴────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-15-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+package = describe("*-3.csv")
+print(package.to_yaml())
+</code></pre>
+
+<pre><code class="language-yaml">resources:
+  - name: capital-3
+    type: table
+    path: capital-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string
+  - name: country-3
+    type: table
+    path: country-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: capital_id
+          type: integer
+        - name: name
+          type: string
+        - name: population
+          type: integer</code></pre>
+
+  </div>
+  </div><p>We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages.</p>
+<p>Following the pattern that is already familiar to the guide reader, we add some additional metadata:</p>
+<blockquote>
+<p>You can edit "country.package.yaml" manually instead of running Python</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+package = describe("*-3.csv")
+package.title = "Countries and their capitals"
+package.description = "The data was collected as a research project"
+package.get_resource("country-3").name = "country"
+package.get_resource("capital-3").name = "capital"
+package.get_resource("country").schema.foreign_keys.append(
+    {"fields": ["capital_id"], "reference": {"resource": "capital", "fields": ["id"]}}
+)
+package.to_yaml("country.package.yaml")
+</code></pre>
+
+  </div>
+  </div><p>In this case, we add a relation between different files connecting <code>id</code> and <code>capital_id</code>. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven't added individual fields' titles and descriptions, but that can be done as it was shown in the "Table Schema" section.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country.package.yaml
+</code></pre>
+
+<pre><code class="language-yaml">title: Countries and their capitals
+description: The data was collected as a research project
+resources:
+  - name: capital
+    type: table
+    path: capital-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string
+  - name: country
+    type: table
+    path: country-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: capital_id
+          type: integer
+        - name: name
+          type: string
+        - name: population
+          type: integer
+      foreignKeys:
+        - fields:
+            - capital_id
+          reference:
+            resource: capital
+            fields:
+              - id</code></pre>
+
+  </div>
+    <div id="livemark-tabs-17-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country.package.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">title: Countries and their capitals
+description: The data was collected as a research project
+resources:
+  - name: capital
+    type: table
+    path: capital-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: name
+          type: string
+  - name: country
+    type: table
+    path: country-3.csv
+    scheme: file
+    format: csv
+    mediatype: text/csv
+    encoding: utf-8
+    schema:
+      fields:
+        - name: id
+          type: integer
+        - name: capital_id
+          type: integer
+        - name: name
+          type: string
+        - name: population
+          type: integer
+      foreignKeys:
+        - fields:
+            - capital_id
+          reference:
+            resource: capital
+            fields:
+              - id</code></pre>
+
+  </div>
+  </div><p>The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like <code>schema</code>, <code>dialect</code>, and <code>resource</code>. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets.</p>
+<p>To continue learning about data resources please read:</p>
+<ul>
+<li><a href="https://specs.frictionlessdata.io/data-package/">Data Package Spec</a></li>
+<li><a href="../../docs/framework/package.html#reference-package">API Reference: Package</a></li>
+</ul>
+<h2>Metadata Importance</h2>
+<p>This documentation contains a great deal of information on how to use metadata and why it's vital for your data. In this section, we're going to provide a quick example based on the "Data Resource" section but please read other documents to get the full picture.</p>
+<p>Let's get back to this complex data table:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-18-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country-2.csv
+</code></pre>
+
+<pre><code class="language-markup"># Author: the scientist
+id;neighbor_id;name;population
+1;;Britain;67
+2;3;France;67
+3;2;Germany;83
+4;5;Italy;60
+5;4;Spain;47</code></pre>
+
+  </div>
+    <div id="livemark-tabs-18-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country-2.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup"># Author: the scientist
+id;neighbor_id;name;population
+1;;Britain;67
+2;3;France;67
+3;2;Germany;83
+4;5;Italy;60
+5;4;Spain;47</code></pre>
+
+  </div>
+  </div><p>As we tried before, by default Frictionless can't properly describe this file so we got something like:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-19-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-19-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-19-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe country-2.csv
+</code></pre>
+
+<pre><code class="language-yaml">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ country-2 │ table │ country-2.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+         country-2
+┏━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ # Author: the scientist ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ string                  │
+└─────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-19-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe("country-2.csv")
+print(resource.to_yaml())
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+schema:
+  fields:
+    - name: '# Author: the scientist'
+      type: string</code></pre>
+
+  </div>
+  </div><p>Trying to extract the data will fail this way:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-20-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract country-2.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ country-2 │ table │ country-2.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+            country-2
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ # Author: the scientist        ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ id;neighbor_id;name;population │
+│ 1;;Britain;67                  │
+│ 2;3;France;67                  │
+│ 3;2;Germany;83                 │
+│ 4;5;Italy;60                   │
+│ 5;4;Spain;47                   │
+└────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-20-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract("country-2.csv")
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-python">{'country-2': [{'# Author: the scientist': 'id;neighbor_id;name;population'},
+               {'# Author: the scientist': '1;;Britain;67'},
+               {'# Author: the scientist': '2;3;France;67'},
+               {'# Author: the scientist': '3;2;Germany;83'},
+               {'# Author: the scientist': '4;5;Italy;60'},
+               {'# Author: the scientist': '5;4;Spain;47'}]}</code></pre>
+
+  </div>
+  </div><p>This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let's now use the <code>country.resource-full.yaml</code> file we created in the "Data Resource" section with Frictionless <code>extract</code>:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-21-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract country.resource-cleaned.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ country-2 │ table │ country-2.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                 country-2
+┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ neighbor_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ None        │ Britain │ 67         │
+│ 2  │ 3           │ France  │ 67         │
+│ 3  │ 2           │ Germany │ 83         │
+│ 4  │ 5           │ Italy   │ 60         │
+│ 5  │ 4           │ Spain   │ 47         │
+└────┴─────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-21-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract("country.resource-cleaned.yaml")
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'country-2': [{'id': 1,
+                'name': 'Britain',
+                'neighbor_id': None,
+                'population': 67},
+               {'id': 2, 'name': 'France', 'neighbor_id': 3, 'population': 67},
+               {'id': 3, 'name': 'Germany', 'neighbor_id': 2, 'population': 83},
+               {'id': 4, 'name': 'Italy', 'neighbor_id': 5, 'population': 60},
+               {'id': 5, 'name': 'Spain', 'neighbor_id': 4, 'population': 47}]}</code></pre>
+
+  </div>
+  </div><p>As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. <code>id</code> is Python's integer not string. We can now export and share this data without any worries.</p>
+<h2>Inferring Metadata</h2>
+<blockquote>
+<p>Many Frictionless Framework's classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in <a href="../references/api-reference.md#metadata">API Reference</a>.</p>
+</blockquote>
+<p>Many Frictionless functions infer metadata under the hood such as <code>describe</code>, <code>extract</code>, and many more. On a lower-level, it's possible to control this process. To see this, let's create a <code>Resource</code>.</p>
+<div data-tabsl="Python" data-output="python"><pre><code class="language-python">from frictionless import Resource
+
+resource = Resource("country-1.csv")
+print(resource)
+</code></pre>
+
+<pre><code class="language-python">{'name': 'country-1',
+ 'type': 'table',
+ 'path': 'country-1.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}</code></pre>
+</div><div><pre><code>{'path': 'country-1.csv'}
+</code></pre>
+</div><p>Frictionless always tries to be as explicit as possible. We didn't provide any metadata except for <code>path</code> so we got the expected result. But now, we'd like to <code>infer</code> additional metadata:</p>
+<blockquote>
+<p>We can ask for stats using CLI with <code>frictionless describe data/table.csv --stats</code>. Note that we use the <code>stats</code> argument for the <code>resource.infer</code> function.</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-22-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-22-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-22-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe country-1.csv --stats --json
+</code></pre>
+
+<pre><code class="language-json">{
+  "name": "country-1",
+  "type": "table",
+  "path": "country-1.csv",
+  "scheme": "file",
+  "format": "csv",
+  "mediatype": "text/csv",
+  "encoding": "utf-8",
+  "hash": "sha256:7cf6ce03c75461e1d9862b89250dbacf43e97976d1f25c056173971dfb203671",
+  "bytes": 100,
+  "fields": 4,
+  "rows": 5,
+  "schema": {
+    "fields": [
+      {
+        "name": "id",
+        "type": "integer"
+      },
+      {
+        "name": "neighbor_id",
+        "type": "integer"
+      },
+      {
+        "name": "name",
+        "type": "string"
+      },
+      {
+        "name": "population",
+        "type": "integer"
+      }
+    ]
+  }
+}</code></pre>
+
+  </div>
+    <div id="livemark-tabs-22-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource("country-1.csv")
+resource.infer(stats=True)
+pprint(resource)
+</code></pre>
+
+<pre><code class="language-python">{'name': 'country-1',
+ 'type': 'table',
+ 'path': 'country-1.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'encoding': 'utf-8',
+ 'hash': 'sha256:7cf6ce03c75461e1d9862b89250dbacf43e97976d1f25c056173971dfb203671',
+ 'bytes': 100,
+ 'fields': 4,
+ 'rows': 5,
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'neighbor_id', 'type': 'integer'},
+                       {'name': 'name', 'type': 'string'},
+                       {'name': 'population', 'type': 'integer'}]}}</code></pre>
+
+  </div>
+  </div><p>The result is really familiar to us already. We have seen it a lot as an output of the <code>describe</code> function or command. Basically, that's what this high-level function does under the hood: create a resource and then infer additional metadata.</p>
+<p>All the main <code>Metadata</code> classes have this method with different available options but with the same conceptual purpose:</p>
+<ul>
+<li><code>package.infer</code></li>
+<li><code>resource.infer</code></li>
+</ul>
+<p>For more advanced detection options, please read the <a href="framework/detector-guide.md">Detector Guide</a></p>
+<h2>Validating Metadata</h2>
+<p>Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let's first make it invalid:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-23-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-23-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">import yaml
+from frictionless import Resource
+
+descriptor = {}
+descriptor['path'] = 'country-1.csv'
+descriptor['title'] = 1
+try:
+    Resource(descriptor)
+except Exception as exception:
+    print(exception.error)
+    print(exception.reasons)
+</code></pre>
+
+<pre><code class="language-markup">{'type': 'resource-error',
+ 'title': 'Resource Error',
+ 'description': 'A validation cannot be processed.',
+ 'message': 'The data resource has an error: descriptor is not valid',
+ 'tags': [],
+ 'note': 'descriptor is not valid'}
+[{'type': 'resource-error',
+ 'title': 'Resource Error',
+ 'description': 'A validation cannot be processed.',
+ 'message': "The data resource has an error: 'name' is a required property",
+ 'tags': [],
+ 'note': "'name' is a required property"}, {'type': 'resource-error',
+ 'title': 'Resource Error',
+ 'description': 'A validation cannot be processed.',
+ 'message': "The data resource has an error: 1 is not of type 'string' at "
+            "property 'title'",
+ 'tags': [],
+ 'note': "1 is not of type 'string' at property 'title'"}]</code></pre>
+
+  </div>
+  </div><div><pre><code>False
+[{'code': 'resource-error', 'name': 'Resource Error', 'tags': [], 'note': '"1 is not of type \'string\'" at "title" in metadata and at "properties/title/type" in profile', 'message': 'The data resource has an error: "1 is not of type \'string\'" at "title" in metadata and at "properties/title/type" in profile', 'description': 'A validation cannot be processed.'}]
+</code></pre>
+</div><p>We see this error<code>'"1 is not of type \'string\'" at "title" in metadata and at "properties/title/type" in profile'</code> as we set <code>title</code> to be an integer.</p>
+<p>Frictionless' high-level functions like <code>validate</code> runs all metadata checks by default.</p>
+<h2>Transforming Metadata</h2>
+<p>We have seen this before but let's re-iterate; it's possible to transform core metadata properties using Python's interface:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-24-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-24-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource("country.resource-cleaned.yaml")
+resource.title = "Countries"
+resource.description = "It's a research project"
+resource.dialect.header_rows = [2]
+resource.dialect.get_control('csv').delimiter = ";"
+resource.to_yaml("country.resource-updated.yaml")
+</code></pre>
+
+  </div>
+  </div><p>We can add custom options using the <code>custom</code> property:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-25-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-25-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource("country.resource-updated.yaml")
+resource.custom["customKey1"] = "Value1"
+resource.custom["customKey2"] = "Value2"
+resource.to_yaml("country.resource-updated2.yaml")
+</code></pre>
+
+  </div>
+  </div><p>Let's check it out:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-26-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-26-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-26-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country.resource-updated2.yaml
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+title: Countries
+description: It's a research project
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+  csv:
+    delimiter: ;
+schema: country.schema.yaml
+customKey1: Value1
+customKey2: Value2</code></pre>
+
+  </div>
+    <div id="livemark-tabs-26-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country.resource-updated2.yaml') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-yaml">name: country-2
+type: table
+title: Countries
+description: It's a research project
+path: country-2.csv
+scheme: file
+format: csv
+mediatype: text/csv
+encoding: utf-8
+dialect:
+  headerRows:
+    - 2
+  csv:
+    delimiter: ;
+schema: country.schema.yaml
+customKey1: Value1
+customKey2: Value2</code></pre>
+
+  </div>
+  </div> 
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="extracting-data.html">
+        Extracting Data »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../basic-examples.html">
+        « Basic Examples
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/guides/extracting-data.html b/docs/guides/extracting-data.html
new file mode 100644
index 0000000000..6e1e7a39d2
--- /dev/null
+++ b/docs/guides/extracting-data.html
@@ -0,0 +1,4120 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main  extract  functions ( extract ,  extract_resource ,  extract_package ) and will then go into more advanced details about the  Resource Class ,  Package Class ,  Header Class , and  Row Class . The output from the extract function is in 'utf-8' encoding scheme.">
+<meta name="keywords" content="extracting,data">
+<link rel="icon" href="../../assets/logo.png">
+<title>Extracting Data | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/guides/extracting-data.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Extracting Data</h1>
+<blockquote>
+<p>This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a> and <a href="https://framework.frictionlessdata.io/docs/guides/quick-start">Quick Start</a>.</p>
+</blockquote>
+<p>Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main <code>extract</code> functions (<code>extract</code>, <code>extract_resource</code>, <code>extract_package</code>) and will then go into more advanced details about the <code>Resource Class</code>, <code>Package Class</code>, <code>Header Class</code>, and <code>Row Class</code>. The output from the extract function is in 'utf-8' encoding scheme.</p>
+<p>Let's see this with some real files:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv"><code>country-3.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat country-3.csv
+</code></pre>
+
+<pre><code class="language-markup">id,capital_id,name,population
+1,1,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+    <div id="livemark-tabs-1-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('country-3.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,capital_id,name,population
+1,1,Britain,67
+2,3,France,67
+3,2,Germany,83
+4,5,Italy,60
+5,4,Spain,47</code></pre>
+
+  </div>
+  </div><blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv"><code>capital-3.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat capital-3.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('capital-3.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+  </div><p>To start, we will extract data from a resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract country-3.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ country-3 │ table │ country-3.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                country-3
+┏━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ capital_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ 1          │ Britain │ 67         │
+│ 2  │ 3          │ France  │ 67         │
+│ 3  │ 2          │ Germany │ 83         │
+│ 4  │ 5          │ Italy   │ 60         │
+│ 5  │ 4          │ Spain   │ 47         │
+└────┴────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-3-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('country-3.csv')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'country-3': [{'capital_id': 1, 'id': 1, 'name': 'Britain', 'population': 67},
+               {'capital_id': 3, 'id': 2, 'name': 'France', 'population': 67},
+               {'capital_id': 2, 'id': 3, 'name': 'Germany', 'population': 83},
+               {'capital_id': 5, 'id': 4, 'name': 'Italy', 'population': 60},
+               {'capital_id': 4, 'id': 5, 'name': 'Spain', 'population': 47}]}</code></pre>
+
+  </div>
+  </div><h2>Extract Functions</h2>
+<p>The high-level interface for extracting data provided by Frictionless is a set of <code>extract</code> functions:</p>
+<ul>
+<li><code>extract</code>: detects the source file type and extracts data accordingly</li>
+<li><code>resource.extract</code>: returns a data table</li>
+<li><code>package.extract</code>: returns a map of the package's tables</li>
+</ul>
+<p>As described in more detail in the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a>, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema.</p>
+<p>The command/function would be used as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract your-table.csv
+frictionless extract your-resource.json --type resource
+frictionless extract your-package.json --type package
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-4-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import extract
+
+rows = extract('capital-3.csv')
+resource = extract('capital-3.csv', type="resource")
+package = extract('capital-3.csv', type="package")
+</code></pre>
+
+  </div>
+  </div><p>The <code>extract</code> functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the <a href="#resource-class">Resource Class</a> section below.</p>
+<h2>Extracting a Resource</h2>
+<p>A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract capital-3.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ capital-3 │ table │ capital-3.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+   capital-3
+┏━━━━┳━━━━━━━━┓
+┃ id ┃ name   ┃
+┡━━━━╇━━━━━━━━┩
+│ 1  │ London │
+│ 2  │ Berlin │
+│ 3  │ Paris  │
+│ 4  │ Madrid │
+│ 5  │ Rome   │
+└────┴────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-5-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('capital-3.csv')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'capital-3': [{'id': 1, 'name': 'London'},
+               {'id': 2, 'name': 'Berlin'},
+               {'id': 3, 'name': 'Paris'},
+               {'id': 4, 'name': 'Madrid'},
+               {'id': 5, 'name': 'Rome'}]}</code></pre>
+
+  </div>
+  </div><p>Our second option is to extract the resource from a descriptor file by using the <code>extract_resource</code> function. A descriptor file is useful because it can contain different metadata and be stored on the disc.</p>
+<p>As an example of how to use <code>extract_resource</code>, let's first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('capital-3.csv')
+resource.infer()
+# as an example, in the next line we will append the schema
+resource.schema.missing_values.append('3') # will interpret 3 as a missing value
+resource.to_yaml('capital.resource-test.yaml') # use resource.to_json for JSON format
+</code></pre>
+
+  </div>
+  </div><p>You can also use a pre-made descriptor file.</p>
+<p>Now, this descriptor file can be used to extract the resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract capital.resource-test.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ capital-3 │ table │ capital-3.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+    capital-3
+┏━━━━━━┳━━━━━━━━┓
+┃ id   ┃ name   ┃
+┡━━━━━━╇━━━━━━━━┩
+│ 1    │ London │
+│ 2    │ Berlin │
+│ None │ Paris  │
+│ 4    │ Madrid │
+│ 5    │ Rome   │
+└──────┴────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-7-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+rows = extract('capital.resource.yaml')
+pprint(rows)
+</code></pre>
+
+<pre><code class="language-markup">{'capital-invalid': [{'id': 1, 'name': 'London', 'name2': 'Britain'},
+                     {'id': 2, 'name': 'Berlin', 'name2': 'Germany'},
+                     {'id': 3, 'name': 'Paris', 'name2': 'France'},
+                     {'id': 4, 'name': 'Madrid', 'name2': 'Spain'},
+                     {'id': 5, 'name': 'Rome', 'name2': 'Italy'},
+                     {'id': 6, 'name': 'Zagreb', 'name2': 'Croatia'},
+                     {'id': 7, 'name': 'Athens', 'name2': 'Greece'},
+                     {'id': 8, 'name': 'Vienna', 'name2': 'Austria'},
+                     {'id': 8, 'name': 'Warsaw', 'name2': None},
+                     {'id': None, 'name': None, 'name2': None},
+                     {'id': None, 'name': 'Tokio', 'name2': 'Japan'}]}</code></pre>
+
+  </div>
+  </div><p>So what has happened in this example? We set the textual representation of the number "3" to be a missing value. In the output we can see how the <code>id</code> number 3 now appears as <code>None</code> representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like "NA" are more common for missing values.</p>
+<p>You can read more advanced details about the <a href="#resource-class">Resource Class below</a>.</p>
+<h2>Extracting a Package</h2>
+<p>The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file.</p>
+<p>As a primary example, we provide two data files to the <code>extract</code> command which will be enough to detect that it's a dataset. Let's start by using the command-line interface:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract *-3.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+               dataset
+┏━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name      ┃ type  ┃ path          ┃
+┡━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ capital-3 │ table │ capital-3.csv │
+│ country-3 │ table │ country-3.csv │
+└───────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+   capital-3
+┏━━━━┳━━━━━━━━┓
+┃ id ┃ name   ┃
+┡━━━━╇━━━━━━━━┩
+│ 1  │ London │
+│ 2  │ Berlin │
+│ 3  │ Paris  │
+│ 4  │ Madrid │
+│ 5  │ Rome   │
+└────┴────────┘
+                country-3
+┏━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ capital_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ 1          │ Britain │ 67         │
+│ 2  │ 3          │ France  │ 67         │
+│ 3  │ 2          │ Germany │ 83         │
+│ 4  │ 5          │ Italy   │ 60         │
+│ 5  │ 4          │ Spain   │ 47         │
+└────┴────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-8-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import extract
+
+data = extract('*-3.csv')
+pprint(data)
+</code></pre>
+
+<pre><code class="language-markup">{'capital-3': [{'id': 1, 'name': 'London'},
+               {'id': 2, 'name': 'Berlin'},
+               {'id': 3, 'name': 'Paris'},
+               {'id': 4, 'name': 'Madrid'},
+               {'id': 5, 'name': 'Rome'}],
+ 'country-3': [{'capital_id': 1, 'id': 1, 'name': 'Britain', 'population': 67},
+               {'capital_id': 3, 'id': 2, 'name': 'France', 'population': 67},
+               {'capital_id': 2, 'id': 3, 'name': 'Germany', 'population': 83},
+               {'capital_id': 5, 'id': 4, 'name': 'Italy', 'population': 60},
+               {'capital_id': 4, 'id': 5, 'name': 'Spain', 'population': 47}]}</code></pre>
+
+  </div>
+  </div><p>We can also extract the package from a descriptor file using the <code>package.extract</code> function (Note: see the <a href="#package-class">Package Class section</a> for the creation of the <code>country.package.yaml</code> file):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless extract country.package.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+              dataset
+┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
+┃ name    ┃ type  ┃ path          ┃
+┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━┩
+│ capital │ table │ capital-3.csv │
+│ country │ table │ country-3.csv │
+└─────────┴───────┴───────────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+    capital
+┏━━━━┳━━━━━━━━┓
+┃ id ┃ name   ┃
+┡━━━━╇━━━━━━━━┩
+│ 1  │ London │
+│ 2  │ Berlin │
+│ 3  │ Paris  │
+│ 4  │ Madrid │
+│ 5  │ Rome   │
+└────┴────────┘
+                 country
+┏━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┓
+┃ id ┃ capital_id ┃ name    ┃ population ┃
+┡━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━┩
+│ 1  │ 1          │ Britain │ 67         │
+│ 2  │ 3          │ France  │ 67         │
+│ 3  │ 2          │ Germany │ 83         │
+│ 4  │ 5          │ Italy   │ 60         │
+│ 5  │ 4          │ Spain   │ 47         │
+└────┴────────────┴─────────┴────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-9-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package('country.package.yaml')
+pprint(package.extract())
+</code></pre>
+
+<pre><code class="language-markup">{'capital': [{'id': 1, 'name': 'London'},
+             {'id': 2, 'name': 'Berlin'},
+             {'id': 3, 'name': 'Paris'},
+             {'id': 4, 'name': 'Madrid'},
+             {'id': 5, 'name': 'Rome'}],
+ 'country': [{'capital_id': 1, 'id': 1, 'name': 'Britain', 'population': 67},
+             {'capital_id': 3, 'id': 2, 'name': 'France', 'population': 67},
+             {'capital_id': 2, 'id': 3, 'name': 'Germany', 'population': 83},
+             {'capital_id': 5, 'id': 4, 'name': 'Italy', 'population': 60},
+             {'capital_id': 4, 'id': 5, 'name': 'Spain', 'population': 47}]}</code></pre>
+
+  </div>
+  </div><p>You can read more advanced details about the <a href="#package-class">Package Class below</a>.</p>
+<blockquote>
+<p>The following sections contain further, advanced details about the <code>Resource Class</code>, <code>Package Class</code>, <code>Header Class</code>, and <code>Row Class</code>.</p>
+</blockquote>
+<h2>Resource Class</h2>
+<p>The Resource class provides metadata about a resource with read and stream functions. The <code>extract</code> functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be <code>rows</code>, <code>data</code>, <code>text</code>, or <code>bytes</code>. Let's try reading all of them.</p>
+<h3>Reading Bytes</h3>
+<p>It's a byte representation of the contents:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource('country-3.csv')
+pprint(resource.read_bytes())
+</code></pre>
+
+<pre><code class="language-markup">(b'id,capital_id,name,population\n1,1,Britain,67\n2,3,France,67\n3,2,Germany,8'
+ b'3\n4,5,Italy,60\n5,4,Spain,47\n')</code></pre>
+
+  </div>
+  </div><h3>Reading Text</h3>
+<p>It's a textual representation of the contents:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('country-3.csv')
+pprint(resource.read_text())
+</code></pre>
+
+<pre><code class="language-markup">('id,capital_id,name,population\n'
+ '1,1,Britain,67\n'
+ '2,3,France,67\n'
+ '3,2,Germany,83\n'
+ '4,5,Italy,60\n'
+ '5,4,Spain,47\n')</code></pre>
+
+  </div>
+  </div><h3>Reading Cells</h3>
+<p>For a tabular data there are raw representaion of the tabular contents:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('country-3.csv')
+pprint(resource.read_cells())
+</code></pre>
+
+<pre><code class="language-markup">[['id', 'capital_id', 'name', 'population'],
+ ['1', '1', 'Britain', '67'],
+ ['2', '3', 'France', '67'],
+ ['3', '2', 'Germany', '83'],
+ ['4', '5', 'Italy', '60'],
+ ['5', '4', 'Spain', '47']]</code></pre>
+
+  </div>
+  </div><h3>Reading Rows</h3>
+<p>For a tabular data there are row available which is are normalized lists presented as dictionaries:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource('country-3.csv')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'capital_id': 1, 'name': 'Britain', 'population': 67},
+ {'id': 2, 'capital_id': 3, 'name': 'France', 'population': 67},
+ {'id': 3, 'capital_id': 2, 'name': 'Germany', 'population': 83},
+ {'id': 4, 'capital_id': 5, 'name': 'Italy', 'population': 60},
+ {'id': 5, 'capital_id': 4, 'name': 'Spain', 'population': 47}]</code></pre>
+
+  </div>
+  </div><h3>Reading a Header</h3>
+<p>For a tabular data there is the Header object available:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('country-3.csv') as resource:
+    pprint(resource.header)
+</code></pre>
+
+<pre><code class="language-markup">['id', 'capital_id', 'name', 'population']</code></pre>
+
+  </div>
+  </div><h3>Streaming Interfaces</h3>
+<p>It's really handy to read all your data into memory but it's not always possible if a file is very big. For such cases, Frictionless provides streaming functions:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+with Resource('country-3.csv') as resource:
+    resource.byte_stream
+    resource.text_stream
+    resource.list_stream
+    resource.row_stream
+</code></pre>
+
+  </div>
+  </div><h2>Package Class</h2>
+<p>The Package class provides functions to read the contents of a package. First of all, let's create a package descriptor:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe *-3.csv --json &gt; country.package.json
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-16-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+package = describe('*-3.csv')
+package.to_json('country.package.json')
+</code></pre>
+
+  </div>
+  </div><p>Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above.</p>
+<p>We can create a package from data files (using their paths) and then read the package's resources:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package
+
+package = Package('*-3.csv')
+pprint(package.get_resource('country-3').read_rows())
+pprint(package.get_resource('capital-3').read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'capital_id': 1, 'name': 'Britain', 'population': 67},
+ {'id': 2, 'capital_id': 3, 'name': 'France', 'population': 67},
+ {'id': 3, 'capital_id': 2, 'name': 'Germany', 'population': 83},
+ {'id': 4, 'capital_id': 5, 'name': 'Italy', 'population': 60},
+ {'id': 5, 'capital_id': 4, 'name': 'Spain', 'population': 47}]
+[{'id': 1, 'name': 'London'},
+ {'id': 2, 'name': 'Berlin'},
+ {'id': 3, 'name': 'Paris'},
+ {'id': 4, 'name': 'Madrid'},
+ {'id': 5, 'name': 'Rome'}]</code></pre>
+
+  </div>
+  </div><p>The package by itself doesn't provide any read functions directly because it's just a contrainer. You can select a pacakge's resource and use the Resource API from above for data reading.</p>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="validating-data.html">
+        Validating Data »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="describing-data.html">
+        « Describing Data
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/guides/transforming-data.html b/docs/guides/transforming-data.html
new file mode 100644
index 0000000000..03cd42cc4d
--- /dev/null
+++ b/docs/guides/transforming-data.html
@@ -0,0 +1,3751 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the  Transform Principles  sections belows.">
+<meta name="keywords" content="transforming,data">
+<link rel="icon" href="../../assets/logo.png">
+<title>Transforming Data | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/guides/transforming-data.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Transforming Data</h1>
+<blockquote>
+<p>This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a> and <a href="https://framework.frictionlessdata.io/docs/guides/quick-start">Quick Start</a>.</p>
+</blockquote>
+<p>Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the <a href="#transform-principles">Transform Principles</a> sections belows.</p>
+<p>In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas.</p>
+<p>Keep reading below to learn about the principles underlying Frictionless Transform, or <a href="/docs/guides/transform-guide#transform-functions">skip ahead</a> to see how to use the Transform code.</p>
+<h2>Transform Principles</h2>
+<p>Frictionless Transform is based on a few core principles which are shared with other parts of the framework:</p>
+<h3>Conceptual Simplicity</h3>
+<p>Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input's metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them.</p>
+<h3>Metadata Matters</h3>
+<p>There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don't lose type and other important information during the pipeline evaluation.</p>
+<h3>Data Streaming</h3>
+<p>Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files.</p>
+<h3>Lazy Evaluation</h3>
+<p>With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a <code>normalize</code> step. So it's possible to transform a rather big file without even casting types, for example, if you only need to reshape it.</p>
+<h3>Software Reuse</h3>
+<p>For the core transform functions, Frictionless uses the amazing <a href="https://petl.readthedocs.io/en/stable/">PETL</a> project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL's processors.</p>
+<h2>Transform Functions</h2>
+<p>Frictionless supports a few different kinds of data and metadata transformations:</p>
+<ul>
+<li>resource and package transformations</li>
+<li>transformations based on a declarative pipeline</li>
+</ul>
+<p>The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We'll talk more about pipelines in the <a href="#transforming-pipeline">Transforming Pipeline</a> section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a>.</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv"><code>transform.csv</code></a> to reproduce the examples (right-click and "Save link as". You might need to change the file extension from .txt to .csv).</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat transform.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name,population
+1,germany,83
+2,france,66
+3,spain,47</code></pre>
+
+  </div>
+  </div><p>The high-level interface to transform data is a set of <code>transform</code> functions:</p>
+<ul>
+<li><code>transform</code>: detects the source type and transforms data accordingly</li>
+<li><code>reosurce.transform</code>: transforms a resource</li>
+<li><code>package.transform</code>: transforms a package</li>
+</ul>
+<p>We'll see examples of these functions in the next few sections.</p>
+<h2>Transforming a Resource</h2>
+<p>Let's write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, Pipeline, steps
+
+# Define source resource
+source = Resource(path="transform.csv")
+
+# Create a pipeline
+pipeline = Pipeline(steps=[
+    steps.table_normalize(),
+    steps.field_add(name="cars", formula='population*2', descriptor={'type': 'integer'}),
+])
+
+# Apply transform pipeline
+target = source.transform(pipeline)
+
+# Print resulting schema and data
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'cars', 'type': 'integer'}]}
++----+-----------+------------+------+
+| id | name      | population | cars |
++====+===========+============+======+
+|  1 | 'germany' |         83 |  166 |
++----+-----------+------------+------+
+|  2 | 'france'  |         66 |  132 |
++----+-----------+------------+------+
+|  3 | 'spain'   |         47 |   94 |
++----+-----------+------------+------+</code></pre>
+
+  </div>
+  </div><p>Let's break down the transforming steps we applied:</p>
+<ol>
+<li><code>steps.table_normalize</code> - cast data types and shape the table according to the schema, inferred or provided</li>
+<li><code>steps.field_add</code> - adds a field to data and metadata based on the information provided by the user</li>
+</ol>
+<p>There are many more available steps that we will cover below.</p>
+<h2>Transforming a Package</h2>
+<p>A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource, transform, steps
+
+# Define source package
+source = Package(resources=[Resource(name='main', path="transform.csv")])
+
+# Create a pipeline
+pipeline = Pipeline(steps=[
+    steps.resource_add(name="extra", descriptor={"data": [['id', 'cars'], [1, 166], [2, 132], [3, 94]]}),
+    steps.resource_transform(
+        name="main",
+        steps=[
+            steps.table_normalize(),
+            steps.table_join(resource="extra", field_name="id"),
+        ],
+    ),
+    steps.resource_remove(name="extra"),
+])
+
+# Apply transform steps
+target = source.transform(pipeline)
+
+# Print resulting resources, schema and data
+print(target.resource_names)
+print(target.get_resource("main").schema)
+print(target.get_resource("main").to_view())
+</code></pre>
+
+<pre><code class="language-markup">['main']
+{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'cars', 'type': 'integer'}]}
++----+-----------+------------+------+
+| id | name      | population | cars |
++====+===========+============+======+
+|  1 | 'germany' |         83 |  166 |
++----+-----------+------------+------+
+|  2 | 'france'  |         66 |  132 |
++----+-----------+------------+------+
+|  3 | 'spain'   |         47 |   94 |
++----+-----------+------------+------+</code></pre>
+
+  </div>
+  </div><p>We have basically done the same as in <a href="#transforming-a-resource">Transforming a Resource</a> section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be.</p>
+<h2>Transforming Pipeline</h2>
+<p>A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too.</p>
+<p>For resource and package types it's mostly the same functionality as we have seen above, but written declaratively. So let's run the same resource transformation as we did in the <a href="#transforming-a-resource">Transforming a Resource</a> section:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Pipeline, transform
+
+pipeline = Pipeline.from_descriptor({
+    "steps": [
+        {"type": "table-normalize"},
+        {
+            "type": "field-add",
+            "name": "cars",
+            "formula": "population*2",
+            "descriptor": {"type": "integer"}
+        },
+    ],
+})
+print(pipeline)
+</code></pre>
+
+<pre><code class="language-markup">{'steps': [{'type': 'table-normalize'},
+           {'name': 'cars',
+            'type': 'field-add',
+            'formula': 'population*2',
+            'descriptor': {'type': 'integer'}}]}</code></pre>
+
+  </div>
+  </div><p>So what's the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has  given you a <code>pipeline.json</code> file, you can run <code>frictionless transform pipeline.json</code> in the CLI to get the same results as they got.</p>
+<h2>Available Steps</h2>
+<p>Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing <code>steps.table...</code> and you will see all the available steps for that group. The available groups are:</p>
+<ul>
+<li>resource</li>
+<li>table</li>
+<li>field</li>
+<li>row</li>
+<li>cell</li>
+</ul>
+<p>See <a href="../steps/cell.md">Transform Steps</a> for a list of all available steps. It is also possible to write custom transform steps: see the next section.</p>
+<h2>Custom Steps</h2>
+<p>Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: <code>steps.field_remove</code>).</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Package, Resource, Step, transform, steps
+
+class custom_step(Step):
+    def transform_resource(self, resource):
+        current = resource.to_copy()
+
+        # Data
+        def data():
+            with current:
+                for list in current.cell_stream:
+                    yield list[1:]
+
+        # Meta
+        resource.data = data
+        resource.schema.remove_field("id")
+
+source = Resource("transform.csv")
+pipeline = Pipeline(steps=[custom_step()])
+target = source.transform(pipeline)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++-----------+------------+
+| name      | population |
++===========+============+
+| 'germany' |         83 |
++-----------+------------+
+| 'france'  |         66 |
++-----------+------------+
+| 'spain'   |         47 |
++-----------+------------+</code></pre>
+
+  </div>
+  </div><p>As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about <a href="extension/step-guide.md">Custom Steps</a> and <a href="extension/plugin-guide.md">Plugins</a>.</p>
+<h2>Transform Utils</h2>
+<blockquote>
+<p>Transform Utils is under construction.</p>
+</blockquote>
+<h2>Working with PETL</h2>
+<p>In some cases, it's better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL's <a href="https://petl.readthedocs.io/en/stable/">documentation portal</a>.</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(path='transform.csv')
+petl_table = resource.to_petl()
+# Use it with PETL framework
+print(petl_table)
+</code></pre>
+
+<pre><code class="language-markup">+----+---------+------------+
+| id | name    | population |
++====+=========+============+
+| 1  | germany | 83         |
++----+---------+------------+
+| 2  | france  | 66         |
++----+---------+------------+
+| 3  | spain   | 47         |
++----+---------+------------+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../console/overview.html">
+        Overview »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="validating-data.html">
+        « Validating Data
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/guides/validating-data.html b/docs/guides/validating-data.html
new file mode 100644
index 0000000000..429d354bde
--- /dev/null
+++ b/docs/guides/validating-data.html
@@ -0,0 +1,4798 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let's explore how Frictionless helps to achieve this task using an invalid data table example:">
+<meta name="keywords" content="validating,data">
+<link rel="icon" href="../../assets/logo.png">
+<title>Validating Data | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/guides/validating-data.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Validating Data</h1>
+<blockquote>
+<p>This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a> and <a href="https://framework.frictionlessdata.io/docs/guides/quick-start">Quick Start</a>.</p>
+</blockquote>
+<p>Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let's explore how Frictionless helps to achieve this task using an invalid data table example:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv"><code>capital-invalid.csv</code></a> to reproduce the examples (right-click and "Save link as")..</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat capital-invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name,name
+1,London,Britain
+2,Berlin,Germany
+3,Paris,France
+4,Madrid,Spain
+5,Rome,Italy
+6,Zagreb,Croatia
+7,Athens,Greece
+8,Vienna,Austria
+8,Warsaw
+
+x,Tokio,Japan,review</code></pre>
+
+  </div>
+    <div id="livemark-tabs-1-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('capital-invalid.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,name,name
+1,London,Britain
+2,Berlin,Germany
+3,Paris,France
+4,Madrid,Spain
+5,Rome,Italy
+6,Zagreb,Croatia
+7,Athens,Greece
+8,Vienna,Austria
+8,Warsaw
+
+x,Tokio,Japan,review</code></pre>
+
+  </div>
+  </div><p>We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate capital-invalid.csv
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                          dataset
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name            ┃ type  ┃ path                ┃ status  ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ capital-invalid │ table │ capital-invalid.csv │ INVALID │
+└─────────────────┴───────┴─────────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                capital-invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ duplicate-label │ Label "name" in the header at position "3"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 10   │ 3     │ missing-cell    │ Row at position "10" has a missing cell in  │
+│      │       │                 │ field "name2" at position "3"               │
+│ 11   │ None  │ blank-row       │ Row at position "11" is completely blank    │
+│ 12   │ 1     │ type-error      │ Type error in the cell "x" in row "12" and  │
+│      │       │                 │ field "id" at position "1": type is         │
+│      │       │                 │ "integer/default"                           │
+│ 12   │ 4     │ extra-cell      │ Row at position "12" has an extra value in  │
+│      │       │                 │ field at position "4"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-2-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('capital-invalid.csv')
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 5, 'warnings': 0, 'seconds': 0.01},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 5,
+                      'warnings': 0,
+                      'seconds': 0.01,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'missing-cell',
+                        'title': 'Missing Cell',
+                        'description': 'This row has less values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "10" has a missing cell in '
+                                   'field "name2" at position "3"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['8', 'Warsaw'],
+                        'rowNumber': 10,
+                        'cell': '',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'blank-row',
+                        'title': 'Blank Row',
+                        'description': 'This row is empty. A row should '
+                                       'contain at least one value.',
+                        'message': 'Row at position "11" is completely blank',
+                        'tags': ['#table', '#row'],
+                        'note': '',
+                        'cells': [],
+                        'rowNumber': 11},
+                       {'type': 'type-error',
+                        'title': 'Type Error',
+                        'description': 'The value does not match the schema '
+                                       'type and format for this field.',
+                        'message': 'Type error in the cell "x" in row "12" and '
+                                   'field "id" at position "1": type is '
+                                   '"integer/default"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': 'type is "integer/default"',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'x',
+                        'fieldName': 'id',
+                        'fieldNumber': 1},
+                       {'type': 'extra-cell',
+                        'title': 'Extra Cell',
+                        'description': 'This row has more values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "12" has an extra value in '
+                                   'field at position "4"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'review',
+                        'fieldName': '',
+                        'fieldNumber': 4}]}]}</code></pre>
+
+  </div>
+  </div><h2>Validate Functions</h2>
+<p>The high-level interface for validating data provided by Frictionless is a set of <code>validate</code> functions:</p>
+<ul>
+<li><code>validate</code>: detects the source type and validates data accordingly</li>
+<li><code>Schema.validate_descriptor</code>: validates a schema's metadata</li>
+<li><code>resource.validate</code>: validates a resource's data and metadata</li>
+<li><code>package.validate</code>: validates a package's data and metadata</li>
+<li><code>inquiry.validate</code>: validates a special <code>Inquiry</code> object which represents a validation task instruction</li>
+</ul>
+<p>On the command-line, there is only one command but there is a flag to adjust the behavior. It's useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate your-data.csv
+frictionless validate your-schema.yaml --type schema
+frictionless validate your-data.csv --type resource
+frictionless validate your-package.json --type package
+frictionless validate your-inquiry.yaml --type inquiry
+</code></pre>
+
+  </div>
+  </div><p>As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the <a href="https://framework.frictionlessdata.io/docs/guides/introduction">Introduction</a>.</p>
+<h2>Validating a Schema</h2>
+<p>The <code>Schema.validate_descriptor</code> function is the only function validating solely metadata. To see this work, let's create an invalid table schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">import yaml
+from frictionless import Schema
+
+descriptor = {}
+descriptor['fields'] = 'bad' # must be a list
+with open('bad.schema.yaml', 'w') as file:
+    yaml.dump(descriptor, file)
+</code></pre>
+
+  </div>
+  </div><p>And let's validate this schema:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate bad.schema.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                     dataset
+┏━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name       ┃ type ┃ path            ┃ status  ┃
+┡━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ bad.schema │ json │ bad.schema.yaml │ INVALID │
+└────────────┴──────┴─────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                   bad.schema
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type         ┃ Message                                        ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ None  │ schema-error │ Schema is not valid: 'bad' is not of type      │
+│      │       │              │ 'array' at property 'fields'                   │
+└──────┴───────┴──────────────┴────────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-5-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate('bad.schema.yaml')
+pprint(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 1, 'warnings': 0, 'seconds': 0.001},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'bad.schema',
+            'type': 'json',
+            'valid': False,
+            'place': 'bad.schema.yaml',
+            'labels': [],
+            'stats': {'errors': 1, 'warnings': 0, 'seconds': 0.001},
+            'warnings': [],
+            'errors': [{'type': 'schema-error',
+                        'title': 'Schema Error',
+                        'description': 'Provided schema is not valid.',
+                        'message': "Schema is not valid: 'bad' is not of type "
+                                   "'array' at property 'fields'",
+                        'tags': [],
+                        'note': "'bad' is not of type 'array' at property "
+                                "'fields'"}]}]}</code></pre>
+
+  </div>
+  </div><p>We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid.</p>
+<h2>Validating a Resource</h2>
+<p>As was shown in the <a href="https://framework.frictionlessdata.io/docs/guides/describing-data">"Describing Data" guide</a>, a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe capital-invalid.csv &gt; capital.resource.yaml
+</code></pre>
+
+  </div>
+    <div id="livemark-tabs-6-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('capital-invalid.csv')
+resource.to_yaml('capital.resource.yaml')
+</code></pre>
+
+  </div>
+  </div><p>Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also.</p>
+<p>Let's now validate to ensure that we are getting the same result that we got without using a resource:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate capital.resource.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                          dataset
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name            ┃ type  ┃ path                ┃ status  ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ capital-invalid │ table │ capital-invalid.csv │ INVALID │
+└─────────────────┴───────┴─────────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                capital-invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ duplicate-label │ Label "name" in the header at position "3"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 10   │ 3     │ missing-cell    │ Row at position "10" has a missing cell in  │
+│      │       │                 │ field "name2" at position "3"               │
+│ 11   │ None  │ blank-row       │ Row at position "11" is completely blank    │
+│ 12   │ 1     │ type-error      │ Type error in the cell "x" in row "12" and  │
+│      │       │                 │ field "id" at position "1": type is         │
+│      │       │                 │ "integer/default"                           │
+│ 12   │ 4     │ extra-cell      │ Row at position "12" has an extra value in  │
+│      │       │                 │ field at position "4"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-7-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital.resource.yaml')
+print(report)
+</code></pre>
+
+<pre><code class="language-python">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 5, 'warnings': 0, 'seconds': 0.005},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 5,
+                      'warnings': 0,
+                      'seconds': 0.005,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'missing-cell',
+                        'title': 'Missing Cell',
+                        'description': 'This row has less values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "10" has a missing cell in '
+                                   'field "name2" at position "3"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['8', 'Warsaw'],
+                        'rowNumber': 10,
+                        'cell': '',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'blank-row',
+                        'title': 'Blank Row',
+                        'description': 'This row is empty. A row should '
+                                       'contain at least one value.',
+                        'message': 'Row at position "11" is completely blank',
+                        'tags': ['#table', '#row'],
+                        'note': '',
+                        'cells': [],
+                        'rowNumber': 11},
+                       {'type': 'type-error',
+                        'title': 'Type Error',
+                        'description': 'The value does not match the schema '
+                                       'type and format for this field.',
+                        'message': 'Type error in the cell "x" in row "12" and '
+                                   'field "id" at position "1": type is '
+                                   '"integer/default"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': 'type is "integer/default"',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'x',
+                        'fieldName': 'id',
+                        'fieldNumber': 1},
+                       {'type': 'extra-cell',
+                        'title': 'Extra Cell',
+                        'description': 'This row has more values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "12" has an extra value in '
+                                   'field at position "4"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'review',
+                        'fieldName': '',
+                        'fieldNumber': 4}]}]}</code></pre>
+
+  </div>
+  </div><p>Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let's extend our resource descriptor to show how you can edit and validate metadata:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import describe
+
+resource = describe('capital-invalid.csv')
+resource.add_defined('stats')  # TODO: fix and remove this line
+resource.stats.md5 = 'ae23c74693ca2d3f0e38b9ba3570775b' # this is a made up incorrect
+resource.stats.bytes = 100 # this is wrong
+resource.to_yaml('capital.resource-bad.yaml')
+</code></pre>
+
+  </div>
+  </div><p>We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate capital.resource-bad.yaml  # TODO: it should have 7 errors
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                          dataset
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name            ┃ type  ┃ path                ┃ status  ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ capital-invalid │ table │ capital-invalid.csv │ INVALID │
+└─────────────────┴───────┴─────────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                capital-invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ duplicate-label │ Label "name" in the header at position "3"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 10   │ 3     │ missing-cell    │ Row at position "10" has a missing cell in  │
+│      │       │                 │ field "name2" at position "3"               │
+│ 11   │ None  │ blank-row       │ Row at position "11" is completely blank    │
+│ 12   │ 1     │ type-error      │ Type error in the cell "x" in row "12" and  │
+│      │       │                 │ field "id" at position "1": type is         │
+│      │       │                 │ "integer/default"                           │
+│ 12   │ 4     │ extra-cell      │ Row at position "12" has an extra value in  │
+│      │       │                 │ field at position "4"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-9-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital.resource-bad.yaml')
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 5, 'warnings': 0, 'seconds': 0.005},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 5,
+                      'warnings': 0,
+                      'seconds': 0.005,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'missing-cell',
+                        'title': 'Missing Cell',
+                        'description': 'This row has less values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "10" has a missing cell in '
+                                   'field "name2" at position "3"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['8', 'Warsaw'],
+                        'rowNumber': 10,
+                        'cell': '',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'blank-row',
+                        'title': 'Blank Row',
+                        'description': 'This row is empty. A row should '
+                                       'contain at least one value.',
+                        'message': 'Row at position "11" is completely blank',
+                        'tags': ['#table', '#row'],
+                        'note': '',
+                        'cells': [],
+                        'rowNumber': 11},
+                       {'type': 'type-error',
+                        'title': 'Type Error',
+                        'description': 'The value does not match the schema '
+                                       'type and format for this field.',
+                        'message': 'Type error in the cell "x" in row "12" and '
+                                   'field "id" at position "1": type is '
+                                   '"integer/default"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': 'type is "integer/default"',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'x',
+                        'fieldName': 'id',
+                        'fieldNumber': 1},
+                       {'type': 'extra-cell',
+                        'title': 'Extra Cell',
+                        'description': 'This row has more values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "12" has an extra value in '
+                                   'field at position "4"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'review',
+                        'fieldName': '',
+                        'fieldNumber': 4}]}]}</code></pre>
+
+  </div>
+  </div><h2>Validating a Package</h2>
+<p>A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file:</p>
+<blockquote>
+<p>Download <a href="https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv"><code>capital-valid.csv</code></a> to reproduce the examples (right-click and "Save link as").</p>
+</blockquote>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat capital-valid.csv
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+    <div id="livemark-tabs-10-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('capital-valid.csv') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">id,name
+1,London
+2,Berlin
+3,Paris
+4,Madrid
+5,Rome</code></pre>
+
+  </div>
+  </div><p>Now let's describe and validate a package which contains the data files we have seen so far:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless describe capital-*id.csv &gt; capital.package.yaml
+frictionless validate capital.package.yaml
+</code></pre>
+
+<pre><code class="language-markup">──────────────────────────────────── Tables ────────────────────────────────────
+                                    dataset
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type          ┃ Message                                       ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ None  │ package-error │ The data package has an error: cannot         │
+│      │       │               │ retrieve metadata "capital.package.yaml"      │
+│      │       │               │ because ""                                    │
+└──────┴───────┴───────────────┴───────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-11-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import describe, validate
+
+# create package descriptor
+package = describe("capital-*id.csv")
+package.to_yaml("capital.package.yaml")
+# validate
+report = validate("capital.package.yaml")
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 2, 'errors': 5, 'warnings': 0, 'seconds': 0.011},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 5,
+                      'warnings': 0,
+                      'seconds': 0.005,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'missing-cell',
+                        'title': 'Missing Cell',
+                        'description': 'This row has less values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "10" has a missing cell in '
+                                   'field "name2" at position "3"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['8', 'Warsaw'],
+                        'rowNumber': 10,
+                        'cell': '',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'blank-row',
+                        'title': 'Blank Row',
+                        'description': 'This row is empty. A row should '
+                                       'contain at least one value.',
+                        'message': 'Row at position "11" is completely blank',
+                        'tags': ['#table', '#row'],
+                        'note': '',
+                        'cells': [],
+                        'rowNumber': 11},
+                       {'type': 'type-error',
+                        'title': 'Type Error',
+                        'description': 'The value does not match the schema '
+                                       'type and format for this field.',
+                        'message': 'Type error in the cell "x" in row "12" and '
+                                   'field "id" at position "1": type is '
+                                   '"integer/default"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': 'type is "integer/default"',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'x',
+                        'fieldName': 'id',
+                        'fieldNumber': 1},
+                       {'type': 'extra-cell',
+                        'title': 'Extra Cell',
+                        'description': 'This row has more values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "12" has an extra value in '
+                                   'field at position "4"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'review',
+                        'fieldName': '',
+                        'fieldNumber': 4}]},
+           {'name': 'capital-valid',
+            'type': 'table',
+            'valid': True,
+            'place': 'capital-valid.csv',
+            'labels': ['id', 'name'],
+            'stats': {'errors': 0,
+                      'warnings': 0,
+                      'seconds': 0.003,
+                      'md5': 'e7b6592a0a4356ba834e4bf1c8e8c7f8',
+                      'sha256': '04202244cbb3662b0f97bfa65adfad045724cbc8d798a7c0eb85533e9da40a5b',
+                      'bytes': 50,
+                      'fields': 2,
+                      'rows': 5},
+            'warnings': [],
+            'errors': []}]}</code></pre>
+
+  </div>
+  </div><p>As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource.</p>
+<h2>Validating an Inquiry</h2>
+<blockquote>
+<p>The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you.</p>
+</blockquote>
+<p>Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding <code>validate</code> functions.</p>
+<p>Let's create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, <code>capital-valid.csv</code> and the resource, <code>capital.resource.json</code> which describes the invalid data file we have already seen:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Inquiry, InquiryTask
+
+inquiry = Inquiry(tasks=[
+    InquiryTask(path='capital-valid.csv'),
+    InquiryTask(resource='capital.resource.yaml'),
+])
+inquiry.to_yaml('capital.inquiry.yaml')
+</code></pre>
+
+  </div>
+  </div><p>As usual, let's run validation:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">frictionless validate capital.inquiry.yaml
+</code></pre>
+
+<pre><code class="language-markup">─────────────────────────────────── Dataset ────────────────────────────────────
+                          dataset
+┏━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+┃ name            ┃ type  ┃ path                ┃ status  ┃
+┡━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+│ capital-valid   │ table │ capital-valid.csv   │ VALID   │
+│ capital-invalid │ table │ capital-invalid.csv │ INVALID │
+└─────────────────┴───────┴─────────────────────┴─────────┘
+──────────────────────────────────── Tables ────────────────────────────────────
+                                capital-invalid
+┏━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Row  ┃ Field ┃ Type            ┃ Message                                     ┃
+┡━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
+│ None │ 3     │ duplicate-label │ Label "name" in the header at position "3"  │
+│      │       │                 │ is duplicated to a label: at position "2"   │
+│ 10   │ 3     │ missing-cell    │ Row at position "10" has a missing cell in  │
+│      │       │                 │ field "name2" at position "3"               │
+│ 11   │ None  │ blank-row       │ Row at position "11" is completely blank    │
+│ 12   │ 1     │ type-error      │ Type error in the cell "x" in row "12" and  │
+│      │       │                 │ field "id" at position "1": type is         │
+│      │       │                 │ "integer/default"                           │
+│ 12   │ 4     │ extra-cell      │ Row at position "12" has an extra value in  │
+│      │       │                 │ field at position "4"                       │
+└──────┴───────┴─────────────────┴─────────────────────────────────────────────┘</code></pre>
+
+  </div>
+    <div id="livemark-tabs-13-Python" class="tab-pane fade ">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate("capital.inquiry.yaml")
+print(report)
+</code></pre>
+
+<pre><code class="language-markup">{'valid': False,
+ 'stats': {'tasks': 2, 'errors': 5, 'warnings': 0, 'seconds': 0.016},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-valid',
+            'type': 'table',
+            'valid': True,
+            'place': 'capital-valid.csv',
+            'labels': ['id', 'name'],
+            'stats': {'errors': 0,
+                      'warnings': 0,
+                      'seconds': 0.007,
+                      'md5': 'e7b6592a0a4356ba834e4bf1c8e8c7f8',
+                      'sha256': '04202244cbb3662b0f97bfa65adfad045724cbc8d798a7c0eb85533e9da40a5b',
+                      'bytes': 50,
+                      'fields': 2,
+                      'rows': 5},
+            'warnings': [],
+            'errors': []},
+           {'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 5,
+                      'warnings': 0,
+                      'seconds': 0.005,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'missing-cell',
+                        'title': 'Missing Cell',
+                        'description': 'This row has less values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "10" has a missing cell in '
+                                   'field "name2" at position "3"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['8', 'Warsaw'],
+                        'rowNumber': 10,
+                        'cell': '',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3},
+                       {'type': 'blank-row',
+                        'title': 'Blank Row',
+                        'description': 'This row is empty. A row should '
+                                       'contain at least one value.',
+                        'message': 'Row at position "11" is completely blank',
+                        'tags': ['#table', '#row'],
+                        'note': '',
+                        'cells': [],
+                        'rowNumber': 11},
+                       {'type': 'type-error',
+                        'title': 'Type Error',
+                        'description': 'The value does not match the schema '
+                                       'type and format for this field.',
+                        'message': 'Type error in the cell "x" in row "12" and '
+                                   'field "id" at position "1": type is '
+                                   '"integer/default"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': 'type is "integer/default"',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'x',
+                        'fieldName': 'id',
+                        'fieldNumber': 1},
+                       {'type': 'extra-cell',
+                        'title': 'Extra Cell',
+                        'description': 'This row has more values compared to '
+                                       'the header row (the first row in the '
+                                       'data source). A key concept is that '
+                                       'all the rows in tabular data must have '
+                                       'the same number of columns.',
+                        'message': 'Row at position "12" has an extra value in '
+                                   'field at position "4"',
+                        'tags': ['#table', '#row', '#cell'],
+                        'note': '',
+                        'cells': ['x', 'Tokio', 'Japan', 'review'],
+                        'rowNumber': 12,
+                        'cell': 'review',
+                        'fieldName': '',
+                        'fieldNumber': 4}]}]}</code></pre>
+
+  </div>
+  </div><p>At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power.</p>
+<blockquote>
+<p>The Inquiry will use multiprocessing if there is the <code>parallel</code> flag provided. It might speed up your validation dramatically especially on a 4+ cores processor.</p>
+</blockquote>
+<h2>Validation Report</h2>
+<p>All the <code>validate</code> functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let's explore a report:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate('capital-invalid.csv', pick_errors=['duplicate-label'])
+print(report)
+</code></pre>
+
+<pre><code class="language-python">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 1, 'warnings': 0, 'seconds': 0.01},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'capital-invalid',
+            'type': 'table',
+            'valid': False,
+            'place': 'capital-invalid.csv',
+            'labels': ['id', 'name', 'name'],
+            'stats': {'errors': 1,
+                      'warnings': 0,
+                      'seconds': 0.01,
+                      'md5': 'dcdeae358cfd50860c18d953e021f836',
+                      'sha256': '95cc611e3b2457447ce62721a9b79d1a063d82058fc144d6d2a8dda53f30c3a6',
+                      'bytes': 171,
+                      'fields': 3,
+                      'rows': 11},
+            'warnings': [],
+            'errors': [{'type': 'duplicate-label',
+                        'title': 'Duplicate Label',
+                        'description': 'Two columns in the header row have the '
+                                       'same value. Column names should be '
+                                       'unique.',
+                        'message': 'Label "name" in the header at position "3" '
+                                   'is duplicated to a label: at position "2"',
+                        'tags': ['#table', '#header', '#label'],
+                        'note': 'at position "2"',
+                        'labels': ['id', 'name', 'name'],
+                        'rowNumbers': [1],
+                        'label': 'name',
+                        'fieldName': 'name2',
+                        'fieldNumber': 3}]}]}</code></pre>
+
+  </div>
+  </div><p>As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the <a href="../docs/framework/report.html#reference">API Reference</a>. Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let's use the <code>report.flatten</code> function to simplify the representation of errors. This function helps to represent a report as a list of errors:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate("capital-invalid.csv", pick_errors=["duplicate-label"])
+pprint(report.flatten(["rowNumber", "fieldNumber", "code", "message"]))
+</code></pre>
+
+<pre><code class="language-python">[[None,
+  3,
+  None,
+  'Label "name" in the header at position "3" is duplicated to a label: at '
+  'position "2"']]</code></pre>
+
+  </div>
+  </div><p>In some situations, an error can't be associated with a task; then it goes to the top-level <code>report.errors</code> property:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate("bad.json", type='schema')
+print(report)
+</code></pre>
+
+<pre><code class="language-python">{'valid': False,
+ 'stats': {'tasks': 1, 'errors': 1, 'warnings': 0, 'seconds': 0.0},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'name': 'bad',
+            'type': 'json',
+            'valid': False,
+            'place': 'bad.json',
+            'labels': [],
+            'stats': {'errors': 1, 'warnings': 0, 'seconds': 0.0},
+            'warnings': [],
+            'errors': [{'type': 'schema-error',
+                        'title': 'Schema Error',
+                        'description': 'Provided schema is not valid.',
+                        'message': 'Schema is not valid: cannot retrieve '
+                                   'metadata "bad.json" because "[Errno 2] No '
+                                   'such file or directory: \'bad.json\'"',
+                        'tags': [],
+                        'note': 'cannot retrieve metadata "bad.json" because '
+                                '"[Errno 2] No such file or directory: '
+                                '\'bad.json\'"'}]}]}</code></pre>
+
+  </div>
+  </div><h2>Validation Errors</h2>
+<p>The Error object is at the heart of the validation process. The Report has <code>report.errors</code> and <code>report.tasks[].errors</code>, properties that can contain the Error object. Let's explore it by taking a deeper look at the <code>duplicate-label</code> error:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate("capital-invalid.csv", pick_errors=["duplicate-label"])
+error = report.error  # this is only available for one table / one error sitution
+print(f'Type: "{error.type}"')
+print(f'Title: "{error.title}"')
+print(f'Tags: "{error.tags}"')
+print(f'Note: "{error.note}"')
+print(f'Message: "{error.message}"')
+print(f'Description: "{error.description}"')
+</code></pre>
+
+<pre><code class="language-markup">Type: "duplicate-label"
+Title: "Duplicate Label"
+Tags: "['#table', '#header', '#label']"
+Note: "at position "2""
+Message: "Label "name" in the header at position "3" is duplicated to a label: at position "2""
+Description: "Two columns in the header row have the same value. Column names should be unique."</code></pre>
+
+  </div>
+  </div><p>Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our <code>duplicate-label</code> error:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-18-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import validate
+
+report = validate("capital-invalid.csv", pick_errors=["duplicate-label"])
+error = report.error  # this is only available for one table / one error sitution
+print(error)
+</code></pre>
+
+<pre><code class="language-python">{'type': 'duplicate-label',
+ 'title': 'Duplicate Label',
+ 'description': 'Two columns in the header row have the same value. Column '
+                'names should be unique.',
+ 'message': 'Label "name" in the header at position "3" is duplicated to a '
+            'label: at position "2"',
+ 'tags': ['#table', '#header', '#label'],
+ 'note': 'at position "2"',
+ 'labels': ['id', 'name', 'name'],
+ 'rowNumbers': [1],
+ 'label': 'name',
+ 'fieldName': 'name2',
+ 'fieldNumber': 3}</code></pre>
+
+  </div>
+  </div><div><pre><code>{'code': 'duplicate-label',
+ 'description': 'Two columns in the header row have the same value. Column '
+                'names should be unique.',
+ 'fieldName': 'name2',
+ 'fieldNumber': 3,
+ 'fieldPosition': 3,
+ 'label': 'name',
+ 'labels': ['id', 'name', 'name'],
+ 'message': 'Label "name" in the header at position "3" is duplicated to a '
+            'label: at position "2"',
+ 'name': 'Duplicate Label',
+ 'note': 'at position "2"',
+ 'rowPositions': [1],
+ 'tags': ['#table', '#header', '#label']}
+</code></pre>
+</div><p>Please explore the <a href="/docs/references/errors-reference">Errors Reference</a> to learn about all the available errors and their properties.</p>
+<h2>Available Checks</h2>
+<p>There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See <a href="../checks/cell.html">Validation Checks</a> for a list of available checks.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-19-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-19-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate, checks
+
+checks = [checks.sequential_value(field_name='id')]
+report = validate('capital-invalid.csv', checks=checks)
+pprint(report.flatten(["rowNumber", "fieldNumber", "type", "note"]))
+</code></pre>
+
+<pre><code class="language-markup">[[None, 3, 'duplicate-label', 'at position "2"'],
+ [10, 3, 'missing-cell', ''],
+ [10, 1, 'sequential-value', 'the value is not sequential'],
+ [11, None, 'blank-row', ''],
+ [12, 1, 'type-error', 'type is "integer/default"'],
+ [12, 4, 'extra-cell', '']]</code></pre>
+
+  </div>
+  </div><div><pre><code>[[None, 3, 'duplicate-label', 'at position "2"'],
+ [10, 3, 'missing-cell', ''],
+ [10, 1, 'sequential-value', 'the value is not sequential'],
+ [11, None, 'blank-row', ''],
+ [12, 1, 'type-error', 'type is "integer/default"'],
+ [12, 4, 'extra-cell', '']]
+</code></pre>
+</div><blockquote>
+<p>Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below.</p>
+</blockquote>
+<h2>Custom Checks</h2>
+<p>There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it's very easy to use your own custom checks. Let's see with an example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-20-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Check, validate, errors
+
+# Create check
+class forbidden_two(Check):
+    Errors = [errors.CellError]
+    def validate_row(self, row):
+        if row['header'] == 2:
+            note = '2 is forbidden!'
+            yield errors.CellError.from_row(row, note=note, field_name='header')
+
+# Validate table
+source = b'header\n1\n2\n3'
+report = validate(source,  format='csv', checks=[forbidden_two()])
+pprint(report.flatten(["rowNumber", "fieldNumber", "code", "note"]))
+</code></pre>
+
+<pre><code class="language-markup">[[3, 1, None, '2 is forbidden!']]</code></pre>
+
+  </div>
+  </div><p>Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like <code>validate_header</code> etc. Please read the <a href="../references/api-reference.md">API Reference</a> for more details.</p>
+<p>Learn more about custom checks in the <a href=".../docs/checks/baseline.html#reference">Check Guide</a>.</p>
+<h2>Pick/Skip Errors</h2>
+<p>We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let's see an example of how to pick and skip errors:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-21-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report1 = validate("capital-invalid.csv", pick_errors=["duplicate-label"])
+report2 = validate("capital-invalid.csv", skip_errors=["duplicate-label"])
+pprint(report1.flatten(["rowNumber", "fieldNumber", "type"]))
+pprint(report2.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+
+<pre><code class="language-markup">[[None, 3, 'duplicate-label']]
+[[10, 3, 'missing-cell'],
+ [11, None, 'blank-row'],
+ [12, 1, 'type-error'],
+ [12, 4, 'extra-cell']]</code></pre>
+
+  </div>
+  </div><p>It's also possible to use error tags (for more information please consult the <a href="../references/errors-reference.md">Errors Reference</a>):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-22-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-22-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report1 = validate("capital-invalid.csv", pick_errors=["#header"])
+report2 = validate("capital-invalid.csv", skip_errors=["#row"])
+pprint(report1.flatten(["rowNumber", "fieldNumber", "type"]))
+pprint(report2.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+
+<pre><code class="language-markup">[[None, 3, 'duplicate-label']]
+[[None, 3, 'duplicate-label']]</code></pre>
+
+  </div>
+  </div><h2>Limit Errors</h2>
+<p>This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to "fail fast". For instance, here we use <code>limit_errors</code> to find just the 1st error and add it to our report:</p>
+<div data-title="" python><pre><code class="language-python">from pprint import pprint
+from frictionless import validate
+
+report = validate("capital-invalid.csv", limit_errors=1)
+pprint(report.flatten(["rowNumber", "fieldNumber", "type"]))
+</code></pre>
+</div><div><pre><code>[[None, 3, 'duplicate-label']]
+</code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="transforming-data.html">
+        Transforming Data »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="extracting-data.html">
+        « Extracting Data
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/portals/ckan.html b/docs/portals/ckan.html
new file mode 100644
index 0000000000..b5799907cd
--- /dev/null
+++ b/docs/portals/ckan.html
@@ -0,0 +1,3916 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="With CKAN portal feature you can load and publish packages from a">
+<meta name="keywords" content="ckan,portal">
+<link rel="icon" href="../../assets/logo.png">
+<title>Ckan Portal | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/portals/ckan.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Ckan Portal</h1>
+<p>With CKAN portal feature you can load and publish packages from a
+<a href="https://ckan.org">CKAN</a>, an open-source Data Management System.</p>
+<h2>Installation</h2>
+<p>To install this plugin you need to do:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[ckan] --pre
+pip install 'frictionless[ckan]' --pre # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading a Package</h2>
+<p>To import a Dataset from a CKAN instance as a Frictionless Package you can do
+as below:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.portals import CkanControl
+from frictionless import Package
+
+ckan_control = CkanControl()
+package = Package('https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos', control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>Where 'https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos' is the URL for
+the CKAN dataset. This will download the dataset and all its resources
+metadata.</p>
+<p>You can pass parameters to CKAN Control to configure it, like the CKAN instance
+base URL (<code>baseurl</code>) and the dataset that you do want to download (<code>dataset</code>):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.portals import CkanControl
+from frictionless import Package
+
+ckan_control = CkanControl(baseurl='https://legado.dados.gov.br', dataset='bolsa-familia-pagamentos')
+package = Package(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>You don't need to pass the <code>dataset</code> parameter to CkanControl. In the case that
+you pass only the <code>baseurl</code> you can download a package as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.portals import CkanControl
+from frictionless import Package
+
+ckan_control = CkanControl(baseurl='https://legado.dados.gov.br')
+package = Package('bolsa-familia-pagamentos', control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><h2>Ignoring a Resource Schema</h2>
+<p>In case that the CKAN dataset has a resource containing errors in its schema,
+you still can load the package passing the parameter <code>ignore_schema=True</code> to
+CKAN Control:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.portals import CkanControl
+from frictionless import Package
+
+ckan_control = CkanControl(baseurl='https://legado.dados.gov.br', ignore_schema=True)
+package = Package('bolsa-familia-pagamentos', control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>This will download the dataset and all its resources, saving the resources'
+original schemas on <code>original_schema</code>.</p>
+<h2>Publishing a package</h2>
+<p>To publish a Package to a CKAN instance you will need an API key from an CKAN's
+user that has permission to create datasets. This key can be passed to CKAN
+Control as the parameter <code>apikey</code>.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.portals import CkanControl
+from frictionless import Package
+
+ckan_control = CkanControl(baseurl='https://legado.dados.gov.br', apikey='YOUR-SECRET-API-KEY')
+package = Package(...) # Create your package
+package.publish(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><h2>Reading a Catalog</h2>
+<p>You can download a list of CKAN datasets using the Catalog.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">
+import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br')
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>This will download all datasets from the instance, limited only by the maximum
+number of datasets returned by the instance CKAN API. If the instance returns
+only 10 datasets as default, you can request more packages passing the
+parameter <code>num_packages</code>. In the example above if you want to download 1000
+datasets you can do as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">
+import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', num_packages=1000)
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>It's possible that when you are requesting a large number of packages from
+CKAN, that some of them don't have a valid Package descriptor according to the
+specifications. In that case the standard behaviour will be to stop downloading
+a raise an exception. If you want to ignore individual package errors, you can
+pass the parameter <code>ignore_package_errors=True</code>:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">
+import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', ignore_package_errors=True, num_packages=1000)
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>And the output of the command above will be the CKAN datasets ids with errors
+and the total number of packages returned by your query to the CKAN instance:</p>
+<div><pre><code>Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property "contributors[].email" is not valid "email")
+Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property "contributors[].email" is not valid "email")
+Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property "contributors[].email" is not valid "email") (The data package has an error: property "contributors[].email" is not valid "email")
+Total number of packages: 13786
+</code></pre>
+</div><p>You can see in the example above that 1000 packages were download from a total
+13786 packages. You can download other packages passing an offset as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">
+import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', ignore_package_erros=True, results_offset=1000)
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>This will download 1000 packages after the the first 1000 packages.</p>
+<h2>Fetching the datasets from an Organization or Group</h2>
+<p>To fetch all packages from a organization will can use the CKAN Control
+parameter <code>organization_name</code>. e.g. if you want to fetch all datasets from the
+organization <code>https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb</code> you can do
+as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', organization_name='agencia-espacial-brasileira-aeb')
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><p>Similarly, if you want to download all datasets from a CKAN Group you can pass
+the parameter <code>group_id</code> to the CKAN Control as:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', group_id='ciencia-informacao-e-comunicacao')
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><h2>Using CKAN search</h2>
+<p>You can also fetch only the datasets that are returned by the <a href="https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search">CKAN Package
+Search endpoint</a>.
+You can pass the search parameters as the parameter <code>search</code> to CKAN Control.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">import frictionless
+from frictionless import portals, Catalog
+
+ckan_control = portals.CkanControl(baseurl='https://legado.dados.gov.br', search={'q': 'name:bolsa*'})
+c = Catalog(control=ckan_control)
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-portals.ckancontrol" class="livemark-reference-heading">portals.CkanControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-portals.ckancontrol" class="livemark-reference-heading">portals.CkanControl <small>(class)</small></h3>
+      <p>Ckan control representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, baseurl: Optional[str] = None, dataset: Optional[str] = None, apikey: Optional[str] = None, ignore_package_errors: Optional[bool] = False, ignore_schema: Optional[bool] = False, group_id: Optional[str] = None, organization_name: Optional[str] = None, search: Optional[Dict[str, Any]] = None, num_packages: Optional[int] = None, results_offset: Optional[int] = None, allow_update: Optional[bool] = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          baseurl
+          (Optional[str])        </li>
+                <li>
+          dataset
+          (Optional[str])        </li>
+                <li>
+          apikey
+          (Optional[str])        </li>
+                <li>
+          ignore_package_errors
+          (Optional[bool])        </li>
+                <li>
+          ignore_schema
+          (Optional[bool])        </li>
+                <li>
+          group_id
+          (Optional[str])        </li>
+                <li>
+          organization_name
+          (Optional[str])        </li>
+                <li>
+          search
+          (Optional[Dict[str, Any]])        </li>
+                <li>
+          num_packages
+          (Optional[int])        </li>
+                <li>
+          results_offset
+          (Optional[int])        </li>
+                <li>
+          allow_update
+          (Optional[bool])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-portals.ckancontrol.baseurl" class="livemark-reference-heading">portals.ckanControl.baseurl <small>(property)</small></h3>
+      <p>
+    Endpoint url for CKAN instance. e.g. https://dados.gov.br
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.dataset" class="livemark-reference-heading">portals.ckanControl.dataset <small>(property)</small></h3>
+      <p>
+    Unique identifier of the dataset to read or write.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.apikey" class="livemark-reference-heading">portals.ckanControl.apikey <small>(property)</small></h3>
+      <p>
+    The access token to authenticate to the CKAN instance. It is required
+    to write files to CKAN instance.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.ignore_package_errors" class="livemark-reference-heading">portals.ckanControl.ignore_package_errors <small>(property)</small></h3>
+      <p>
+    Ignore Package errors in a Catalog. If multiple packages are being downloaded
+    and one fails with an invalid descriptor, continue downloading the rest.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[bool]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.ignore_schema" class="livemark-reference-heading">portals.ckanControl.ignore_schema <small>(property)</small></h3>
+      <p>
+    Ignore dataset resources schemas
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[bool]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.group_id" class="livemark-reference-heading">portals.ckanControl.group_id <small>(property)</small></h3>
+      <p>
+    CKAN Group id to get datasets in a Catalog
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.organization_name" class="livemark-reference-heading">portals.ckanControl.organization_name <small>(property)</small></h3>
+      <p>
+    CKAN Organization name to get datasets in a Catalog
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.search" class="livemark-reference-heading">portals.ckanControl.search <small>(property)</small></h3>
+      <p>
+    CKAN Search parameters as defined on https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.num_packages" class="livemark-reference-heading">portals.ckanControl.num_packages <small>(property)</small></h3>
+      <p>
+    Maximum number of packages to fetch
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.results_offset" class="livemark-reference-heading">portals.ckanControl.results_offset <small>(property)</small></h3>
+      <p>
+    Results page number
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.ckancontrol.allow_update" class="livemark-reference-heading">portals.ckanControl.allow_update <small>(property)</small></h3>
+      <p>
+    Update a dataset on publish with an id is provided on the package descriptor
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[bool]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="github.html">
+        Github Portal »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../formats/zip.html">
+        « Zip Format
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/portals/github.html b/docs/portals/github.html
new file mode 100644
index 0000000000..252c5a23e3
--- /dev/null
+++ b/docs/portals/github.html
@@ -0,0 +1,4058 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api.">
+<meta name="keywords" content="github,portal">
+<link rel="icon" href="../../assets/logo.png">
+<title>Github Portal | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/portals/github.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Github Portal</h1>
+<p>Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api.</p>
+<h2>Installation</h2>
+<p>We need to install github extra dependencies to use this feature:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[github] --pre
+pip install 'frictionless[github]' --pre # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Package</h2>
+<p>You can read data from a github repository as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://github.com/fdtester/test-repo-without-datapackage")
+print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'name': 'test-repo-without-datapackage',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'countries',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'student',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx',
+                'scheme': 'https',
+                'format': 'xlsx',
+                'mediatype': 'application/vnd.ms-excel'}]}
+</code></pre>
+</div><p>You can also use alias function instead, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://github.com/fdtester/test-repo-without-datapackage")
+print(package)
+</code></pre>
+
+  </div>
+  </div><p>To increase the access limit, pass 'apikey' as the param to the reader function as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.GithubControl(apikey=apikey)
+package = Package("https://github.com/fdtester/test-repo-without-datapackage", control=control)
+print(package)
+</code></pre>
+
+  </div>
+  </div><p>The <code>reader</code> function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters.</p>
+<p>If the repo has a descriptor it simply returns the descriptor as shown below</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://https://github.com/fdtester/test-repo-with-datapackage-json")
+</code></pre>
+
+  </div>
+  </div><div><pre><code>print(package)
+{'name': 'test-tabulator',
+ 'resources': [{'name': 'first-resource',
+                'path': 'table.xls',
+                'schema': {'fields': [{'name': 'id', 'type': 'number'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'number-two',
+                'path': 'table-reverse.csv',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}
+</code></pre>
+</div><p>Once you read the package from the repo, you can then easily access the resources and its data, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://github.com/fdtester/test-repo-without-datapackage")
+pprint(package.get_resource('capitals').read_rows())
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'id': 1, 'cid': 1, 'name': 'London'},
+ {'id': 2, 'cid': 2, 'name': 'Paris'},
+ {'id': 3, 'cid': 3, 'name': 'Berlin'},
+ {'id': 4, 'cid': 4, 'name': 'Rome'},
+ {'id': 5, 'cid': 5, 'name': 'Lisbon'}]
+</code></pre>
+</div><h2>Reading Catalog</h2>
+<p>Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.GithubControl(search="'TestAction: Read' in:readme", apikey=apikey)
+catalog = Catalog(
+        "https://github.com/fdtester", control=control
+    )
+print("Total packages", len(catalog.packages))
+print(catalog.packages[:2])
+</code></pre>
+
+  </div>
+  </div><div><pre><code>Total packages 4
+[{'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'data/capitals.csv',
+                'scheme': 'file',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'csv': {'skipInitialSpace': True}},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]},
+ {'name': 'test-repo-jquery',
+ 'resources': [{'name': 'country-1',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}]
+</code></pre>
+</div><p>To read catalog, we need authenticated user so we have to pass the token as 'apikey' to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory.</p>
+<p>We can simply use 'control' parameters and get the same result as above, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.GithubControl(search="'TestAction: Read' in:readme", user="fdtester", apikey=apikey)
+catalog = Catalog(control=control)
+print("Total packages", len(catalog.packages))
+print(catalog.packages[:2])
+</code></pre>
+
+  </div>
+  </div><p>As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has 'TestAction: Read' text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document <a href="https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories">here</a>.</p>
+<p>Some examples of the qualifiers:</p>
+<div><pre><code>‘jquery’ in:name
+‘jquery’ in:name user:name
+sort:updated-asc ‘TestAction: Read’ in:readme
+</code></pre>
+</div><p>If we want to read the list of repositories of user 'fdtester' which has 'jquery' in its name then we write search query as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.GithubControl(apikey=apikey, search="user:fdtester jquery in:name")
+catalog = Catalog(control=control)
+print(catalog.packages)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'name': 'test-repo-jquery',
+ 'resources': [{'name': 'country-1',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}]
+</code></pre>
+</div><p>There is only one repository having 'jquery' in name for this user's account, so it returned only one repository.</p>
+<p>We can also read repositories in defined order using 'sort' param or qualifier. Here we are trying to read the repos with 'TestAction: Read' text in readme file in recently updated order, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.GithubControl(apikey=apikey, search="user:fdtester sort:updated-desc 'TestAction: Read' in:readme")
+catalog = Catalog(control=control)
+for index,package in enumerate(catalog.packages):
+    print(f"package:{index}", "\n")
+    print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>package:0
+
+{'name': 'test-repo-without-datapackage',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'countries',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'student',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx',
+                'scheme': 'https',
+                'format': 'xlsx',
+                'mediatype': 'application/vnd.ms-excel'}]}
+package:1
+
+{'name': 'test-repo-jquery',
+ 'resources': [{'name': 'country-1',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}
+package:2
+
+{'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'data/capitals.csv',
+                'scheme': 'file',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'csv': {'skipInitialSpace': True}},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}
+package:3
+
+{'name': 'test-tabulator',
+ 'resources': [{'name': 'first-resource',
+                'path': 'table.xls',
+                'schema': {'fields': [{'name': 'id', 'type': 'number'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'number-two',
+                'path': 'table-reverse.csv',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}
+</code></pre>
+</div><h2>Publishing Data</h2>
+<p>To write data to the repository, we use <code>Package.publish</code> function as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package('1174/datapackage.json')
+control = portals.GithubControl(repo="test-new-repo-doc", name='FD', email=email, apikey=apikey)
+response = package.publish(control=control)
+print(response)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>Repository(full_name="fdtester/test-new-repo-doc")
+</code></pre>
+</div><p>We need to mention <code>name</code> and <code>email</code> explicitly if the user doesn't have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with 'repository write' access.</p>
+<p>If the package is successfully published, the response is a 'Repository' instance.</p>
+<h2>Configuration</h2>
+<p>We can control the behavior of all the above three functions using various params.</p>
+<p>For example, to read only 'csv' files in package we use the following code:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.GithubControl(user="fdtester", formats=["csv"], repo="test-repo-without-datapackage", apikey=apikey)
+package = Package("https://github.com/fdtester/test-repo-without-datapackage")
+print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'name': 'test-repo-without-datapackage',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'countries',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}
+</code></pre>
+</div><p>In order to read first page of the search result and create a catalog, we use <code>per_page</code> and <code>page</code> params as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.GithubControl(apikey=apikey, search="user:fdtester sort:updated-desc 'TestAction: Read' in:readme", per_page=1, page=1)
+catalog = Catalog(control=control)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'name': 'test-repo-jquery',
+ 'resources': [{'name': 'country-1',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}]
+</code></pre>
+</div><p>Similary, we can also control the write function using params as follows:</p>
+<div><pre><code>from pprint import pprint
+from frictionless import portals, Package
+
+package = Package('datapackage.json')
+control = portals.GithubControl(repo="test-repo", name='FD Test', email="test@gmail", apikey=apikey)
+response = package.publish(control=control)
+print(response)
+</code></pre>
+</div><div><pre><code>Repository(full_name="fdtester/test-repo")
+</code></pre>
+</div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-portals.githubcontrol" class="livemark-reference-heading">portals.GithubControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-portals.githubcontrol" class="livemark-reference-heading">portals.GithubControl <small>(class)</small></h3>
+      <p>Github control representation</p>
+            <h4>Signature</h4>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, apikey: Optional[str] = None, basepath: Optional[str] = None, email: Optional[str] = None, formats: Optional[List[str]] = [csv, tsv, xlsx, xls, jsonl, ndjson], name: Optional[str] = None, order: Optional[str] = None, page: Optional[int] = None, per_page: Optional[int] = 30, repo: Optional[str] = None, search: Optional[str] = None, sort: Optional[str] = None, user: Optional[str] = None, filename: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          apikey
+          (Optional[str])        </li>
+                <li>
+          basepath
+          (Optional[str])        </li>
+                <li>
+          email
+          (Optional[str])        </li>
+                <li>
+          formats
+          (Optional[List[str]])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          order
+          (Optional[str])        </li>
+                <li>
+          page
+          (Optional[int])        </li>
+                <li>
+          per_page
+          (Optional[int])        </li>
+                <li>
+          repo
+          (Optional[str])        </li>
+                <li>
+          search
+          (Optional[str])        </li>
+                <li>
+          sort
+          (Optional[str])        </li>
+                <li>
+          user
+          (Optional[str])        </li>
+                <li>
+          filename
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-portals.githubcontrol.apikey" class="livemark-reference-heading">portals.githubControl.apikey <small>(property)</small></h3>
+      <p>The access token to authenticate to the github API. It is required
+    to write files to github repo.
+    For reading, it is optional however using apikey increases the api
+    access limit from 60 to 5000 requests per hour. To write, access
+    token has to have write repository access.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.basepath" class="livemark-reference-heading">portals.githubControl.basepath <small>(property)</small></h3>
+      <p>Base path is the base folder, the package and resource files will be written to.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.email" class="livemark-reference-heading">portals.githubControl.email <small>(property)</small></h3>
+      <p>Email is used while publishing the data to the github repo. It should be set explicitly,
+    if the primary email for the github account is not set to public.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.formats" class="livemark-reference-heading">portals.githubControl.formats <small>(property)</small></h3>
+      <p>Formats instructs plugin to only read specified types of files. By default it is set to
+    'csv,xls,xlsx'.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.name" class="livemark-reference-heading">portals.githubControl.name <small>(property)</small></h3>
+      <p>Name of the github  which is used while publishing the data. It should be provided explicitly,
+    if the name of the user is not set in the github account.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.order" class="livemark-reference-heading">portals.githubControl.order <small>(property)</small></h3>
+      <p>The order in which to retrieve the data sorted by 'sort' param. It can be one of: 'asc','desc'.
+    This parameter is ignored if 'sort' is not provided.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.page" class="livemark-reference-heading">portals.githubControl.page <small>(property)</small></h3>
+      <p>If specified, only the given page is returned.</p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.per_page" class="livemark-reference-heading">portals.githubControl.per_page <small>(property)</small></h3>
+      <p>The number of results per page. Default value is 30. Max value is 100.</p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.repo" class="livemark-reference-heading">portals.githubControl.repo <small>(property)</small></h3>
+      <p>Name of the repo to read or write.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.search" class="livemark-reference-heading">portals.githubControl.search <small>(property)</small></h3>
+      <p>Search query containing one or more search keywords and qualifiers to filter the repositories.
+    For example, 'windows+label:bug+language:python'.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.sort" class="livemark-reference-heading">portals.githubControl.sort <small>(property)</small></h3>
+      <p>Sorts the result of the query by number of stars, forks, help-wanted-issues or updated.
+    By default the results are sorted by best match in desc order.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.user" class="livemark-reference-heading">portals.githubControl.user <small>(property)</small></h3>
+      <p>username of the github account.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.githubcontrol.filename" class="livemark-reference-heading">portals.githubControl.filename <small>(property)</small></h3>
+      <p>Custom data package file name while publishing the data. By default it will use 'datapackage.json'.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="zenodo.html">
+        Zenodo Portal »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="ckan.html">
+        « Ckan Portal
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/portals/zenodo.html b/docs/portals/zenodo.html
new file mode 100644
index 0000000000..007bda6a07
--- /dev/null
+++ b/docs/portals/zenodo.html
@@ -0,0 +1,4201 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from">
+<meta name="keywords" content="zenodo,portal">
+<link rel="icon" href="../../assets/logo.png">
+<title>Zenodo Portal | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/portals/zenodo.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Zenodo Portal</h1>
+<p>Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from
+as well as written to zenodo seamlessly. The api uses 'zenodopy' library underneath to communicate with Zenodo REST API.</p>
+<h2>Installation</h2>
+<p>We need to install zenodo extra dependencies to use this feature:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[zenodo] --pre
+pip install 'frictionless[zenodo]' --pre # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Package</h2>
+<p>You can read data from a zenodo repository as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://zenodo.org/record/7078768")
+package.infer()
+print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'title': 'Frictionless Data Test Dataset Without Descriptor',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'csv': {'skipInitialSpace': True}},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'table',
+                'type': 'table',
+                'path': 'table.xls',
+                'scheme': 'https',
+                'format': 'xls',
+                'encoding': 'utf-8',
+                'mediatype': 'application/vnd.ms-excel',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}
+</code></pre>
+</div><p>To increase the access limit, pass 'apikey' as the param to the reader function as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.ZenodoControl(apikey=apikey)
+package = Package("https://zenodo.org/record/7078768", control=control)
+print(package)
+</code></pre>
+
+  </div>
+  </div><p>The <code>reader</code> function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also.</p>
+<p>If the repo has a descriptor it simply returns the descriptor as shown below:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://zenodo.org/record/7078760")
+package.infer()
+print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'name': 'testing',
+ 'title': 'Frictionless Data Test Dataset',
+ 'resources': [{'name': 'data',
+                'path': 'data.csv',
+                'schema': {'fields': [{'name': 'id',
+                                       'type': 'string',
+                                       'constraints': {'required': True}},
+                                      {'name': 'name', 'type': 'string'},
+                                      {'name': 'description', 'type': 'string'},
+                                      {'name': 'amount', 'type': 'number'}],
+                           'primaryKey': ['id']}},
+               {'name': 'data2',
+                'path': 'data2.csv',
+                'schema': {'fields': [{'name': 'parent', 'type': 'string'},
+                                      {'name': 'comment', 'type': 'string'}],
+                           'foreignKeys': [{'fields': ['parent'],
+                                            'reference': {'resource': 'data',
+                                                          'fields': ['id']}}]}}]}
+</code></pre>
+</div><p>Once you read the package from the repo, you can then easily access the resources and its data, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://zenodo.org/record/7078760")
+pprint(package.get_resource('data').read_rows())
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'amount': Decimal('10000.5'),
+  'description': 'Taxes we collect',
+  'id': 'A3001',
+  'name': 'Taxes'},
+ {'amount': Decimal('2000.5'),
+  'description': 'Parking fees we collect',
+  'id': 'A5032',
+  'name': 'Parking Fees'}]
+</code></pre>
+</div><p>You can apply any functions available in frictionless framework. Here is an example of applying validation to the
+package that was read.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+package = Package("https://zenodo.org/record/7078760")
+report = catalog.packages[0].validate()
+pprint(report)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'valid': True,
+ 'stats': {'tasks': 1, 'warnings': 0, 'errors': 0, 'seconds': 0.655},
+ 'warnings': [],
+ 'errors': [],
+ 'tasks': [{'valid': True,
+            'name': 'first-http-resource',
+            'type': 'table',
+            'place': 'https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv',
+            'labels': ['id', 'cid', 'name'],
+            'stats': {'md5': '154d822b8c2aa259867067f01c0efee5',
+                      'sha256': '5ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8',
+                      'bytes': 76,
+                      'fields': 3,
+                      'rows': 5,
+                      'warnings': 0,
+                      'errors': 0,
+                      'seconds': 0.651},
+            'warnings': [],
+            'errors': []}]}
+
+</code></pre>
+</div><h2>Reading Catalog</h2>
+<p>Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.ZenodoControl(search='notes:"TDWD"')
+catalog = Catalog(control=control)
+catalog.infer()
+print("Total packages", len(catalog.packages))
+print(catalog.packages)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>Total packages 2
+[{'title': 'Frictionless Data Test Dataset Without Descriptor',
+ 'resources': [{'name': 'countries',
+                'type': 'table',
+                'path': 'countries.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'headerRows': [2]},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'neighbor_id', 'type': 'string'},
+                                      {'name': 'name', 'type': 'string'},
+                                      {'name': 'population',
+                                       'type': 'string'}]}}]}, {'title': 'Frictionless Data Test Dataset Without Descriptor',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'csv': {'skipInitialSpace': True}},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'table',
+                'type': 'table',
+                'path': 'table.xls',
+                'scheme': 'https',
+                'format': 'xls',
+                'encoding': 'utf-8',
+                'mediatype': 'application/vnd.ms-excel',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}]
+</code></pre>
+</div><p>In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use 'control' parameters and create the catalog from a single repo, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.ZenodoControl(record="7078768")
+catalog = Catalog(control=control)
+catalog.infer()
+print("Total packages", len(catalog.packages))
+print(catalog.packages)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>Total packages 1
+[{'title': 'Frictionless Data Test Dataset Without Descriptor',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'encoding': 'utf-8',
+                'mediatype': 'text/csv',
+                'dialect': {'csv': {'skipInitialSpace': True}},
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}},
+               {'name': 'table',
+                'type': 'table',
+                'path': 'table.xls',
+                'scheme': 'https',
+                'format': 'xls',
+                'encoding': 'utf-8',
+                'mediatype': 'application/vnd.ms-excel',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'name', 'type': 'string'}]}}]}]
+</code></pre>
+</div><p>As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has 'notes:"TDWD"' text in readme files. Similary we can use many different queries combining many terms, phrases or field
+search. To get full list of different queries you can check the zenodo official document <a href="https://help.zenodo.org/guides/search">here</a>.</p>
+<p>Some examples of the search queries are:</p>
+<div><pre><code>"open science"
+title:"open science"
++description:"frictionless" +title:"Bionomia"
++publication_date:[2022-10-01 TO 2022-11-01] +title:"frictionless"
+</code></pre>
+</div><p>We can search for different terms such as "open science" and also use '+' to specify mandatory. If "+" is not specified, it will be optional and will apply 'OR' logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be
+used.</p>
+<p>If we want to read the list of repositories which has term "+frictionlessdata +science" then we write search query as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+control = portals.ZenodoControl(search='+frictionlessdata +science')
+catalog = Catalog(control=control)
+print("Total Packages", len(catalog.packages))
+</code></pre>
+
+  </div>
+  </div><div><pre><code>Total Packages 1
+</code></pre>
+</div><p>There is only one repository having terms '+frictionlessdata +science', so it returned only one repository.</p>
+<p>We can also read repositories in defined order using 'sort' param. Here we are trying to read the repos with 'creators.name:"FD Tester"' in recently updated order, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+catalog = Catalog(
+       control=portals.ZenodoControl(
+           search='creators.name:"FD Tester"',
+           sort="mostrecent",
+           page=1,
+           size=1,
+       ),
+   )
+catalog.infer()
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'name': 'test-repo-resources-with-http-data-csv',
+ 'title': 'Test Write File - Remote',
+ 'resources': [{'name': 'first-http-resource',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'string'},
+                                      {'name': 'name', 'type': 'string'}]}}]}]
+</code></pre>
+</div><h2>Publishing Data</h2>
+<p>To write data to the repository, we use <code>Package.publish</code> function as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.ZenodoControl(
+        metafn="data/zenodo/meta.json",
+        apikey=apikey
+    )
+package = Package("484/package-to-write/datapackage.json")
+deposition_id = package.publish(control=control)
+print(deposition_id)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>1123500
+</code></pre>
+</div><p>To publish the data, we need to provide metadata for the Zenodo repo which we are sending using "meta.json". In order to be able to publish/write to respository, we need to have the api token with 'repository write' access. If the package is successfully published, the deposition_id will be returned as shown in the example above.</p>
+<p>For testing, we can pass sandbox url using base_url param</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.ZenodoControl(
+        metafn="data/zenodo/meta.json",
+        apikey=apikey_sandbox,
+        base_url="https://sandbox.zenodo.org/api/"
+    )
+package = Package("484/package-to-write/datapackage.json")
+deposition_id = package.publish(control=control)
+</code></pre>
+
+  </div>
+  </div><p>If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.ZenodoControl(
+        apikey=apikey_sandbox,
+        base_url="https://sandbox.zenodo.org/api/"
+    )
+package = Package("484/package-to-write/datapackage.json")
+deposition_id = package.publish(control=control)
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>We can control the behavior of all the above three functions using various params.</p>
+<p>For example, to read only 'csv' files in package we use the following code:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Package
+
+control = portals.ZenodoControl(formats=["csv"], record="7078725", apikey=apikey)
+package = Package(control=control)
+print(package)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>{'name': 'test-repo-without-datapackage',
+ 'resources': [{'name': 'capitals',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'},
+               {'name': 'countries',
+                'type': 'table',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv',
+                'scheme': 'https',
+                'format': 'csv',
+                'mediatype': 'text/csv'}]}
+</code></pre>
+</div><p>In order to read first page of the search result and create a catalog, we use <code>page</code> and <code>size</code> params as follows:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import portals, Catalog
+
+catalog = Catalog(
+       control=portals.ZenodoControl(
+           search='creators.name"FD Tester"',
+           sort="mostrecent",
+           page=1,
+           size=1,
+       ),
+   )
+print(catalog.packages)
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'name': 'test-repo-resources-with-http-data-csv',
+ 'title': 'Test Write File - Remote',
+ 'resources': [{'name': 'first-http-resource',
+                'path': 'https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv',
+                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                                      {'name': 'cid', 'type': 'string'},
+                                      {'name': 'name', 'type': 'string'}]}}]}]
+</code></pre>
+</div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-portals.zenodocontrol" class="livemark-reference-heading">portals.ZenodoControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-16-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-portals.zenodocontrol" class="livemark-reference-heading">portals.ZenodoControl <small>(class)</small></h3>
+      <p>Zenodo control representation</p>
+            <h4>Signature</h4>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, all_versions: Optional[int] = None, apikey: Optional[str] = None, base_url: Optional[str] = https://zenodo.org/api/, bounds: Optional[str] = None, communities: Optional[str] = None, deposition_id: Optional[int] = None, doi: Optional[str] = None, formats: Optional[List[str]] = [csv, tsv, xlsx, xls, jsonl, ndjson, csv.zip, tsv.zip, xlsx.zip, xls.zip, jsonl.zip, ndjson.zip], name: Optional[str] = None, metafn: Optional[str] = None, page: Optional[str] = None, rcustom: Optional[str] = None, record: Optional[str] = None, rtype: Optional[str] = None, search: Optional[str] = None, size: Optional[int] = None, sort: Optional[str] = None, status: Optional[str] = None, subtype: Optional[str] = None, tmp_path: Optional[str] = None) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          all_versions
+          (Optional[int])        </li>
+                <li>
+          apikey
+          (Optional[str])        </li>
+                <li>
+          base_url
+          (Optional[str])        </li>
+                <li>
+          bounds
+          (Optional[str])        </li>
+                <li>
+          communities
+          (Optional[str])        </li>
+                <li>
+          deposition_id
+          (Optional[int])        </li>
+                <li>
+          doi
+          (Optional[str])        </li>
+                <li>
+          formats
+          (Optional[List[str]])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          metafn
+          (Optional[str])        </li>
+                <li>
+          page
+          (Optional[str])        </li>
+                <li>
+          rcustom
+          (Optional[str])        </li>
+                <li>
+          record
+          (Optional[str])        </li>
+                <li>
+          rtype
+          (Optional[str])        </li>
+                <li>
+          search
+          (Optional[str])        </li>
+                <li>
+          size
+          (Optional[int])        </li>
+                <li>
+          sort
+          (Optional[str])        </li>
+                <li>
+          status
+          (Optional[str])        </li>
+                <li>
+          subtype
+          (Optional[str])        </li>
+                <li>
+          tmp_path
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-portals.zenodocontrol.all_versions" class="livemark-reference-heading">portals.zenodoControl.all_versions <small>(property)</small></h3>
+      <p>Show (true or 1) or hide (false or 0) all versions of records.</p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.apikey" class="livemark-reference-heading">portals.zenodoControl.apikey <small>(property)</small></h3>
+      <p>The access token to authenticate to the zenodo API. It is required
+    to write files to zenodo deposit resource.
+    For reading, it is optional however using apikey increases the api
+    access limit from 60 to 100 requests per hour. To write, access
+    token has to have deposit:write access.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.base_url" class="livemark-reference-heading">portals.zenodoControl.base_url <small>(property)</small></h3>
+      <p>Endpoint for zenodo. By default it is set to live site (https://zenodo.org/api). For testing upload,
+    we can use sandbox for example, https://sandbox.zenodo.org/api. Sandbox doesnot work for
+    reading.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.bounds" class="livemark-reference-heading">portals.zenodoControl.bounds <small>(property)</small></h3>
+      <p>Return records filtered by a geolocation bounding box.
+    For example, (Format bounds=143.37158,-38.99357,146.90918,-37.35269)</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.communities" class="livemark-reference-heading">portals.zenodoControl.communities <small>(property)</small></h3>
+      <p>Return records that are part of the specified communities. (Use of community identifier).</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.deposition_id" class="livemark-reference-heading">portals.zenodoControl.deposition_id <small>(property)</small></h3>
+      <p>Id of the deposition resource. Deposition resource is used for uploading and
+    editing files to Zenodo.</p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.doi" class="livemark-reference-heading">portals.zenodoControl.doi <small>(property)</small></h3>
+      <p>Digital Object Identifier(DOI). When the deposition is published, a unique DOI is registered by
+    Zenodo or user can set it manually. This is only for the published depositions. If set, it returns
+    record that matches this DOI</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.formats" class="livemark-reference-heading">portals.zenodoControl.formats <small>(property)</small></h3>
+      <p>Formats instructs plugin to only read specified types of files. By default it is set to
+    '"csv", "tsv", "xlsx", "xls", "jsonl", "ndjson"'.
+    </p>
+            <h4>Signature</h4>
+      <p>Optional[List[str]]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.name" class="livemark-reference-heading">portals.zenodoControl.name <small>(property)</small></h3>
+      <p>Custom name for a catalog or a package. Default name is 'catalog' or 'package'</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.metafn" class="livemark-reference-heading">portals.zenodoControl.metafn <small>(property)</small></h3>
+      <p>Metadata file path for deposition resource. Deposition resource is used for uploading
+    and editing records on Zenodo.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.page" class="livemark-reference-heading">portals.zenodoControl.page <small>(property)</small></h3>
+      <p>Page number to retrieve from the search result.</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.rcustom" class="livemark-reference-heading">portals.zenodoControl.rcustom <small>(property)</small></h3>
+      <p>Return records containing the specified custom keywords. (Format custom=[field_name]:field_value)</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.record" class="livemark-reference-heading">portals.zenodoControl.record <small>(property)</small></h3>
+      <p>Unique identifier of a record. We can use it find the specific record while creating a
+    package or a catalog. For example, 7078768</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.rtype" class="livemark-reference-heading">portals.zenodoControl.rtype <small>(property)</small></h3>
+      <p>Return records of the specified type. (Publication, Poster, Presentation…)</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.search" class="livemark-reference-heading">portals.zenodoControl.search <small>(property)</small></h3>
+      <p>Search query containing one or more search keywords to filter the records.
+    For example, 'notes:"TDBASIC".</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.size" class="livemark-reference-heading">portals.zenodoControl.size <small>(property)</small></h3>
+      <p>Number of results to return per page.</p>
+            <h4>Signature</h4>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.sort" class="livemark-reference-heading">portals.zenodoControl.sort <small>(property)</small></h3>
+      <p>Sort order (bestmatch or mostrecent). Prefix with minus to change form
+    ascending to descending (e.g. -mostrecent)</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.status" class="livemark-reference-heading">portals.zenodoControl.status <small>(property)</small></h3>
+      <p>Filter result based on the deposit status (either draft or published)</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.subtype" class="livemark-reference-heading">portals.zenodoControl.subtype <small>(property)</small></h3>
+      <p>Return records that are part of the specified communities. (Use of community identifier).</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h3 id="reference-portals.zenodocontrol.tmp_path" class="livemark-reference-heading">portals.zenodoControl.tmp_path <small>(property)</small></h3>
+      <p>Temp path to create intermediate package/resource file/s to upload to the zenodo instance</p>
+            <h4>Signature</h4>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><div><pre><code></code></pre>
+</div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../checks/baseline.html">
+        Baseline Check »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="github.html">
+        « Github Portal
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/resources/file.html b/docs/resources/file.html
new file mode 100644
index 0000000000..6899b18c7d
--- /dev/null
+++ b/docs/resources/file.html
@@ -0,0 +1,3492 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A  file  resource is the most basic one. Actually, every data file can be maked as  file . For example:">
+<meta name="keywords" content="file,resource">
+<link rel="icon" href="../../assets/logo.png">
+<title>File Resource | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/resources/file.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>File Resource</h1>
+<p>A <code>file</code> resource is the most basic one. Actually, every data file can be maked as <code>file</code>. For example:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import FileResource
+
+resource = FileResource(path='text.txt')
+resource.infer(stats=True)
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'text',
+ 'type': 'file',
+ 'path': 'text.txt',
+ 'scheme': 'file',
+ 'format': 'txt',
+ 'mediatype': 'text/txt',
+ 'encoding': 'utf-8',
+ 'hash': 'sha256:b9e68e1bea3e5b19ca6b2f98b73a54b73daafaa250484902e09982e07a12e733',
+ 'bytes': 5}</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="text.html">
+        Text Resource »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../advanced/extending.html">
+        « Extension
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/resources/json.html b/docs/resources/json.html
new file mode 100644
index 0000000000..48829810dc
--- /dev/null
+++ b/docs/resources/json.html
@@ -0,0 +1,3512 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A  json  resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development):">
+<meta name="keywords" content="json,resource">
+<link rel="icon" href="../../assets/logo.png">
+<title>Json Resource | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/resources/json.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Json Resource</h1>
+<p>A <code>json</code> resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import JsonResource
+
+resource = JsonResource(path='data.json')
+resource.infer(stats=True)
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'data',
+ 'type': 'json',
+ 'path': 'data.json',
+ 'scheme': 'file',
+ 'format': 'json',
+ 'mediatype': 'text/json',
+ 'encoding': 'utf-8',
+ 'hash': 'sha256:80af3283a5c57e5d3a8d1d4099bebe639c610c4ecc8ce39fe53f9f9d9c441c4a',
+ 'bytes': 21}</code></pre>
+
+  </div>
+  </div><p>We can read the contents:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import JsonResource
+
+resource = JsonResource(path='data.json')
+resource.infer(stats=True)
+print(resource.read_data())
+</code></pre>
+
+<pre><code class="language-markup">{'key': 'value'}</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="table.html">
+        Table Resource »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="text.html">
+        « Text Resource
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/resources/table.html b/docs/resources/table.html
new file mode 100644
index 0000000000..244b522952
--- /dev/null
+++ b/docs/resources/table.html
@@ -0,0 +1,3516 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A  table  resource contains a tabular data file (can be validated with Table Schema):">
+<meta name="keywords" content="table,resource">
+<link rel="icon" href="../../assets/logo.png">
+<title>Table Resource | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/resources/table.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Table Resource</h1>
+<p>A <code>table</code> resource contains a tabular data file (can be validated with Table Schema):</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import TableResource
+
+resource = TableResource(path='table.csv')
+resource.infer(stats=True)
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table',
+ 'type': 'table',
+ 'path': 'table.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv',
+ 'encoding': 'utf-8',
+ 'hash': 'sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8',
+ 'bytes': 30,
+ 'fields': 2,
+ 'rows': 2,
+ 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
+                       {'name': 'name', 'type': 'string'}]}}</code></pre>
+
+  </div>
+  </div><p>We can read the contents:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import TableResource
+
+resource = TableResource(path='table.csv')
+resource.infer(stats=True)
+print(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../schemes/aws.html">
+        AWS Schemes »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="json.html">
+        « Json Resource
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/resources/text.html b/docs/resources/text.html
new file mode 100644
index 0000000000..ad325d02b7
--- /dev/null
+++ b/docs/resources/text.html
@@ -0,0 +1,3514 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="A  text  resource represents a textual file as a markdown document, for example:">
+<meta name="keywords" content="text,resource">
+<link rel="icon" href="../../assets/logo.png">
+<title>Text Resource | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/resources/text.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Text Resource</h1>
+<p>A <code>text</code> resource represents a textual file as a markdown document, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import TextResource
+
+resource = TextResource(path='article.md')
+resource.infer(stats=True)
+print(resource)
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'article',
+ 'type': 'text',
+ 'path': 'article.md',
+ 'scheme': 'file',
+ 'format': 'md',
+ 'mediatype': 'text/markdown',
+ 'encoding': 'utf-8',
+ 'hash': 'sha256:c3d88243a8bbb2d95787af6edd6b0017791a090d18c80765f92b486ab502cebb',
+ 'bytes': 20}</code></pre>
+
+  </div>
+  </div><p>We can read the contents:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless.resources import TextResource
+
+resource = TextResource(path='article.md')
+resource.infer(stats=True)
+print(resource.read_text())
+</code></pre>
+
+<pre><code class="language-markup"># Article
+
+Contents</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="json.html">
+        Json Resource »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="file.html">
+        « File Resource
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/aws.html b/docs/schemes/aws.html
new file mode 100644
index 0000000000..1ff431548b
--- /dev/null
+++ b/docs/schemes/aws.html
@@ -0,0 +1,3594 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket.">
+<meta name="keywords" content="aws,schemes">
+<link rel="icon" href="../../assets/logo.png">
+<title>AWS Schemes | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/aws.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>AWS Schemes</h1>
+<p>Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket.</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">pip install frictionless[aws]
+pip install 'frictionless[aws]' # for zsh shell
+</code></pre>
+
+  </div>
+  </div><h2>Reading Data</h2>
+<p>You can read from this source using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(path='s3://bucket/table.csv')
+pprint(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><p>For reading from a private bucket you need to setup AWS creadentials as it's described in the <a href="https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables">Boto3 documentation</a>.</p>
+<h2>Writing Data</h2>
+<p>A similiar approach can be used for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(path='data/table.csv')
+resource.write('s3://bucket/table.csv')
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a <code>Control</code> to configure how Frictionless read files in this storage. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+from frictionless.plugins.s3 import S3Control
+
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+resource.write('table.new.csv', control=controls.S3Control(endpoint_url='&lt;url&gt;'))
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-schemes.awscontrol" class="livemark-reference-heading">schemes.AwsControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-5-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-schemes.awscontrol" class="livemark-reference-heading">schemes.AwsControl <small>(class)</small></h3>
+      <p>Aws control representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, s3_endpoint_url: str = https://s3.amazonaws.com) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          s3_endpoint_url
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-schemes.awscontrol.s3_endpoint_url" class="livemark-reference-heading">schemes.awsControl.s3_endpoint_url <small>(property)</small></h3>
+      <p></p>
+            <h4>Signature</h4>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="buffer.html">
+        Buffer Scheme »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../resources/table.html">
+        « Table Resource
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/buffer.html b/docs/schemes/buffer.html
new file mode 100644
index 0000000000..d50459e08c
--- /dev/null
+++ b/docs/schemes/buffer.html
@@ -0,0 +1,3514 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports working with bytes loaded into memory.">
+<meta name="keywords" content="buffer,scheme">
+<link rel="icon" href="../../assets/logo.png">
+<title>Buffer Scheme | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/buffer.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Buffer Scheme</h1>
+<p>Frictionless supports working with bytes loaded into memory.</p>
+<h2>Reading Data</h2>
+<p>You can read Buffer Data using <code>Package/Resource</code> API, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(b'id,name\n1,english\n2,german', format='csv')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': 'german'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>A similiar approach can be used for writing:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write(scheme='buffer', format='csv')
+print(target)
+print(target.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'memory',
+ 'type': 'table',
+ 'data': [],
+ 'scheme': 'buffer',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}
+[{'id': 1, 'name': 'english'}, {'id': 2, 'name': 'german'}]</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="local.html">
+        Local Scheme »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="aws.html">
+        « AWS Schemes
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/local.html b/docs/schemes/local.html
new file mode 100644
index 0000000000..73cc4c023b
--- /dev/null
+++ b/docs/schemes/local.html
@@ -0,0 +1,3520 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="You can read and write files locally with Frictionless. This is a basic functionality of Frictionless.">
+<meta name="keywords" content="local,scheme">
+<link rel="icon" href="../../assets/logo.png">
+<title>Local Scheme | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/local.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Local Scheme</h1>
+<p>You can read and write files locally with Frictionless. This is a basic functionality of Frictionless.</p>
+<h2>Reading Data</h2>
+<p>You can read using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(path='table.csv')
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>A similiar approach can be used for writing:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write('table-output.csv')
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'table-output',
+ 'type': 'table',
+ 'path': 'table-output.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="multipart.html">
+        Multipart Scheme »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="buffer.html">
+        « Buffer Scheme
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/multipart.html b/docs/schemes/multipart.html
new file mode 100644
index 0000000000..4dd6ca2f3d
--- /dev/null
+++ b/docs/schemes/multipart.html
@@ -0,0 +1,3584 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="You can read and write files split into chunks with Frictionless.">
+<meta name="keywords" content="multipart,scheme">
+<link rel="icon" href="../../assets/logo.png">
+<title>Multipart Scheme | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/multipart.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Multipart Scheme</h1>
+<p>You can read and write files split into chunks with Frictionless.</p>
+<h2>Reading Data</h2>
+<p>You can read using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+resource = Resource(path='chunk1.csv', extrapaths=['chunk2.csv'])
+pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>A similiar approach can be used for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(path='table.json')
+resource.write('table{number}.json', scheme="multipart", control={"chunkSize": 1000000})
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a <code>Control</code> to configure how Frictionless reads files using this scheme. For example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+from frictionless.plugins.multipart import MultipartControl
+
+control = MultipartControl(chunk_size=1000000)
+resource = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+resource.write('table{number}.json', scheme="multipart", control=control)
+</code></pre>
+
+  </div>
+  </div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-schemes.multipartcontrol" class="livemark-reference-heading">schemes.MultipartControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-schemes.multipartcontrol" class="livemark-reference-heading">schemes.MultipartControl <small>(class)</small></h3>
+      <p>Multipart control representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, chunk_size: int = 100000000) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          chunk_size
+          (int)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-schemes.multipartcontrol.chunk_size" class="livemark-reference-heading">schemes.multipartControl.chunk_size <small>(property)</small></h3>
+      <p>
+    Specifies chunk size for the multipart file.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="remote.html">
+        Remote Scheme »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="local.html">
+        « Local Scheme
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/remote.html b/docs/schemes/remote.html
new file mode 100644
index 0000000000..fdcadffeaa
--- /dev/null
+++ b/docs/schemes/remote.html
@@ -0,0 +1,3608 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="You can read files remotely with Frictionless. This is a basic functionality of Frictionless.">
+<meta name="keywords" content="remote,scheme">
+<link rel="icon" href="../../assets/logo.png">
+<title>Remote Scheme | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/remote.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Remote Scheme</h1>
+<p>You can read files remotely with Frictionless. This is a basic functionality of Frictionless.</p>
+<h2>Reading Data</h2>
+<p>You can read using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+path='https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv'
+resource = Resource(path=path)
+pprint(resource.read_rows())
+</code></pre>
+
+  </div>
+  </div><div><pre><code>[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]
+</code></pre>
+</div><h2>Writing Data</h2>
+<p>A similar approach can be used for writing:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+resource = Resource(path='data/table.csv')
+resource.write('https://example.com/data/table.csv') # will POST the file to the server
+</code></pre>
+
+  </div>
+  </div><h2>Configuration</h2>
+<p>There is a <code>Control</code> to configure remote data, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+from frictionless.plugins.remote import RemoteControl
+
+control = RemoteControl(http_timeout=10)
+path='https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv'
+resource = Resource(path=path, control=control)
+print(resource.to_view())
+</code></pre>
+
+  </div>
+  </div><div><pre><code>+----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | '中国人'     |
++----+-----------+
+</code></pre>
+</div><h2>Reference</h2>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h3 id="hidden-reference-schemes.remotecontrol" class="livemark-reference-heading">schemes.RemoteControl <small>(class)</small></h3>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h3 id="reference-schemes.remotecontrol" class="livemark-reference-heading">schemes.RemoteControl <small>(class)</small></h3>
+      <p>Remote control representation</p>
+            <h4>Signature</h4>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, http_timeout: int = 10, http_preload: bool = False) -&gt; None</p>
+            <h4>Parameters</h4>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          http_timeout
+          (int)        </li>
+                <li>
+          http_preload
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h3 id="reference-schemes.remotecontrol.http_timeout" class="livemark-reference-heading">schemes.remoteControl.http_timeout <small>(property)</small></h3>
+      <p>
+    Specifies the time to wait, if the remote server
+    does not respond before raising an error. The default
+    value is 10.
+    </p>
+            <h4>Signature</h4>
+      <p>int</p>
+          </div>
+        <div>
+      <h3 id="reference-schemes.remotecontrol.http_preload" class="livemark-reference-heading">schemes.remoteControl.http_preload <small>(property)</small></h3>
+      <p>
+    Preloads data to the memory if set to True. It is set
+    to False by default.
+    </p>
+            <h4>Signature</h4>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="stream.html">
+        Stream Scheme »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="multipart.html">
+        « Multipart Scheme
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/schemes/stream.html b/docs/schemes/stream.html
new file mode 100644
index 0000000000..f672fce964
--- /dev/null
+++ b/docs/schemes/stream.html
@@ -0,0 +1,3524 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Frictionless supports using data stored as File-Like objects in Python.">
+<meta name="keywords" content="stream,scheme">
+<link rel="icon" href="../../assets/logo.png">
+<title>Stream Scheme | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/schemes/stream.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Stream Scheme</h1>
+<p>Frictionless supports using data stored as File-Like objects in Python.</p>
+<h2>Reading Data</h2>
+<blockquote>
+<p>It's recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode.</p>
+</blockquote>
+<p>You can read Stream using <code>Package/Resource</code>, for example:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Resource
+
+with open('table.csv', 'rb') as file:
+  resource = Resource(file, format='csv')
+  pprint(resource.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">[{'id': 1, 'name': 'english'}, {'id': 2, 'name': '中国人'}]</code></pre>
+
+  </div>
+  </div><h2>Writing Data</h2>
+<p>A similiar approach can be used for writing:</p>
+ 
+
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource
+
+source = Resource(data=[['id', 'name'], [1, 'english'], [2, 'german']])
+target = source.write(scheme='stream', format='csv')
+print(target)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'memory',
+ 'type': 'table',
+ 'data': [],
+ 'scheme': 'stream',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'english' |
++----+-----------+
+|  2 | 'german'  |
++----+-----------+</code></pre>
+
+  </div>
+  </div><div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../formats/csv.html">
+        Csv Format »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="remote.html">
+        « Remote Scheme
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': '../steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': '../steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': '../steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': '../steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': '../steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/steps/cell.html b/docs/steps/cell.html
new file mode 100644
index 0000000000..98579789e6
--- /dev/null
+++ b/docs/steps/cell.html
@@ -0,0 +1,4217 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others.">
+<meta name="keywords" content="cell,steps">
+<link rel="icon" href="../../assets/logo.png">
+<title>Cell Steps | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/steps/cell.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Cell Steps</h1>
+<p>The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others.</p>
+<h2>Convert Cells</h2>
+<p>Converts cell values of one or more fields using arbitrary functions, method
+invocations or dictionary translations.</p>
+<h3>Using Value</h3>
+<p>We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_convert(field_name='population', value="100"),
+    ],
+)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">+----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |        100 |
++----+-----------+------------+
+|  2 | 'france'  |        100 |
++----+-----------+------------+
+|  3 | 'spain'   |        100 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Using Mapping</h3>
+<p>Another option to modify the field's cell is to provide a mapping. It's a translation table that uses literal matching to replace values. It's usually used for string fields:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_convert(field_name='name', mapping = {'germany': 'GERMANY'}),
+    ],
+)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">+----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'GERMANY' |         83 |
++----+-----------+------------+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Using Function</h3>
+<div data-type="info"><div class="livemark-remark">
+  <div class="alert alert-info" role="alert">
+    Functions are not supported in declarative pipelines
+  </div>
+</div></div><p>We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it's really important to normalize the table first otherwise the function will be applied to a non-parsed value:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from frictionless import Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.cell_convert(field_name='population', function=lambda v: v*2),
+    ],
+)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">+----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |        166 |
++----+-----------+------------+
+|  2 | 'france'  |        132 |
++----+-----------+------------+
+|  3 | 'spain'   |         94 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_convert" class="livemark-reference-heading">steps.cell_convert <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_convert" class="livemark-reference-heading">steps.cell_convert <small>(class)</small></h4>
+      <p>Convert cell
+
+Converts cell values of one or more fields using arbitrary functions, method
+invocations or dictionary translations.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, value: Optional[Any] = None, mapping: Optional[Dict[str, Any]] = None, function: Optional[Any] = None, field_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          value
+          (Optional[Any])        </li>
+                <li>
+          mapping
+          (Optional[Dict[str, Any]])        </li>
+                <li>
+          function
+          (Optional[Any])        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_convert.value" class="livemark-reference-heading">steps.cell_convert.value <small>(property)</small></h4>
+      <p>Value to set in the field's cells</p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_convert.mapping" class="livemark-reference-heading">steps.cell_convert.mapping <small>(property)</small></h4>
+      <p>Mapping to apply to the column</p>
+            <h5>Signature</h5>
+      <p>Optional[Dict[str, Any]]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_convert.function" class="livemark-reference-heading">steps.cell_convert.function <small>(property)</small></h4>
+      <p>Function to apply to the column</p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_convert.field_name" class="livemark-reference-heading">steps.cell_convert.field_name <small>(property)</small></h4>
+      <p>Name of the field to apply the transform on</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Fill Cells</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_replace(pattern="france", replace=None),
+        steps.cell_fill(field_name="name", value="FRANCE"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'FRANCE'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_fill" class="livemark-reference-heading">steps.cell_fill <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_fill" class="livemark-reference-heading">steps.cell_fill <small>(class)</small></h4>
+      <p>Fill cell
+
+Replaces missing values with non-missing values from the adjacent row/column.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, value: Optional[Any] = None, field_name: Optional[str] = None, direction: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          value
+          (Optional[Any])        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+                <li>
+          direction
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_fill.value" class="livemark-reference-heading">steps.cell_fill.value <small>(property)</small></h4>
+      <p>Value to replace in the field cell with missing value</p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_fill.field_name" class="livemark-reference-heading">steps.cell_fill.field_name <small>(property)</small></h4>
+      <p>Name of the field to replace the missing value cells</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_fill.direction" class="livemark-reference-heading">steps.cell_fill.direction <small>(property)</small></h4>
+      <p>Directions to read the non missing value from(left/right/above)</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Format Cells</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_format(template="Prefix: {0}", field_name="name"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-------------------+------------+
+| id | name              | population |
++====+===================+============+
+|  1 | 'Prefix: germany' |         83 |
++----+-------------------+------------+
+|  2 | 'Prefix: france'  |         66 |
++----+-------------------+------------+
+|  3 | 'Prefix: spain'   |         47 |
++----+-------------------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_format" class="livemark-reference-heading">steps.cell_format <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_format" class="livemark-reference-heading">steps.cell_format <small>(class)</small></h4>
+      <p>Format cell
+
+Formats all values in the given or all string fields using the `template` format string.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, template: str, field_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          template
+          (str)        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_format.template" class="livemark-reference-heading">steps.cell_format.template <small>(property)</small></h4>
+      <p>format string to apply to cells</p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_format.field_name" class="livemark-reference-heading">steps.cell_format.field_name <small>(property)</small></h4>
+      <p>field name to apply template format</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Interpolate Cells</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_interpolate(template="Prefix: %s", field_name="name"),
+    ]
+)
+pprint(target.schema)
+pprint(target.read_rows())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
+[{'id': 1, 'name': 'Prefix: germany', 'population': 83},
+ {'id': 2, 'name': 'Prefix: france', 'population': 66},
+ {'id': 3, 'name': 'Prefix: spain', 'population': 47}]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_interpolate" class="livemark-reference-heading">steps.cell_interpolate <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-10-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_interpolate" class="livemark-reference-heading">steps.cell_interpolate <small>(class)</small></h4>
+      <p>Interpolate cell
+
+Interpolate all values in a given or all string fields using the `template` string.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, template: str, field_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          template
+          (str)        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_interpolate.template" class="livemark-reference-heading">steps.cell_interpolate.template <small>(property)</small></h4>
+      <p>template string to apply to the field cells</p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_interpolate.field_name" class="livemark-reference-heading">steps.cell_interpolate.field_name <small>(property)</small></h4>
+      <p>field name to apply template string</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Replace Cells</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_replace(pattern="france", replace="FRANCE"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'FRANCE'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_replace" class="livemark-reference-heading">steps.cell_replace <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_replace" class="livemark-reference-heading">steps.cell_replace <small>(class)</small></h4>
+      <p>Replace cell
+
+Replace cell values in a given field or all fields using user defined pattern.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, pattern: str, replace: str, field_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          pattern
+          (str)        </li>
+                <li>
+          replace
+          (str)        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_replace.pattern" class="livemark-reference-heading">steps.cell_replace.pattern <small>(property)</small></h4>
+      <p>Pattern to search for in single or all fields</p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_replace.replace" class="livemark-reference-heading">steps.cell_replace.replace <small>(property)</small></h4>
+      <p>String to replace</p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_replace.field_name" class="livemark-reference-heading">steps.cell_replace.field_name <small>(property)</small></h4>
+      <p>field name to apply template string</p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Set Cells</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+          steps.cell_set(field_name="population", value=100),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |        100 |
++----+-----------+------------+
+|  2 | 'france'  |        100 |
++----+-----------+------------+
+|  3 | 'spain'   |        100 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.cell_set" class="livemark-reference-heading">steps.cell_set <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.cell_set" class="livemark-reference-heading">steps.cell_set <small>(class)</small></h4>
+      <p>Set cell</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, value: Any, field_name: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          value
+          (Any)        </li>
+                <li>
+          field_name
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.cell_set.value" class="livemark-reference-heading">steps.cell_set.value <small>(property)</small></h4>
+      <p>
+    Value to be set in cell of the given field.
+    </p>
+            <h5>Signature</h5>
+      <p>Any</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.cell_set.field_name" class="livemark-reference-heading">steps.cell_set.field_name <small>(property)</small></h4>
+      <p>
+    Specifies the field name where to set/replace the value.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../fields/any.html">
+        Any Field »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="row.html">
+        « Row Steps
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/steps/field.html b/docs/steps/field.html
new file mode 100644
index 0000000000..54f206fc15
--- /dev/null
+++ b/docs/steps/field.html
@@ -0,0 +1,4622 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Field steps are responsible for managing a Table Schema's fields. You can add or remove them along with more complex operations like unpacking.">
+<meta name="keywords" content="field,steps">
+<link rel="icon" href="../../assets/logo.png">
+<title>Field Steps | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/steps/field.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Field Steps</h1>
+<p>The Field steps are responsible for managing a Table Schema's fields. You can add or remove them along with more complex operations like unpacking.</p>
+<h2>Add Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_add(name="note", value="eu", descriptor={"type": "string"}),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'note', 'type': 'string'}]}
++----+-----------+------------+------+
+| id | name      | population | note |
++====+===========+============+======+
+|  1 | 'germany' |         83 | 'eu' |
++----+-----------+------------+------+
+|  2 | 'france'  |         66 | 'eu' |
++----+-----------+------------+------+
+|  3 | 'spain'   |         47 | 'eu' |
++----+-----------+------------+------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_add" class="livemark-reference-heading">steps.field_add <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_add" class="livemark-reference-heading">steps.field_add <small>(class)</small></h4>
+      <p>Add field.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, value: Optional[Any] = None, formula: Optional[Any] = None, function: Optional[Any] = None, position: Optional[int] = None, descriptor: Optional[types.IDescriptor] = None, incremental: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          value
+          (Optional[Any])        </li>
+                <li>
+          formula
+          (Optional[Any])        </li>
+                <li>
+          function
+          (Optional[Any])        </li>
+                <li>
+          position
+          (Optional[int])        </li>
+                <li>
+          descriptor
+          (Optional[types.IDescriptor])        </li>
+                <li>
+          incremental
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_add.name" class="livemark-reference-heading">steps.field_add.name <small>(property)</small></h4>
+      <p>
+    A human-oriented name for the field.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.value" class="livemark-reference-heading">steps.field_add.value <small>(property)</small></h4>
+      <p>
+    Specifies value for the field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.formula" class="livemark-reference-heading">steps.field_add.formula <small>(property)</small></h4>
+      <p>
+    Evaluatable expressions to set the value for the field. The expressions are
+    processed using simpleeval library.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.function" class="livemark-reference-heading">steps.field_add.function <small>(property)</small></h4>
+      <p>
+    Python function to set the value for the field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.position" class="livemark-reference-heading">steps.field_add.position <small>(property)</small></h4>
+      <p>
+    Position index where to add the field. For example, to
+    add the field in second position, we need to set it as 'position=2'.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.descriptor" class="livemark-reference-heading">steps.field_add.descriptor <small>(property)</small></h4>
+      <p>
+    A dictionary, which contains metadata for the field which
+    describes the properties of the field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[types.IDescriptor]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_add.incremental" class="livemark-reference-heading">steps.field_add.incremental <small>(property)</small></h4>
+      <p>
+    Indicates if it is an incremental value. If True, the sequential value is set
+    to the new field. The default value is false.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Filter Fields</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_filter(names=["id", "name"]),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'}]}
++----+-----------+
+| id | name      |
++====+===========+
+|  1 | 'germany' |
++----+-----------+
+|  2 | 'france'  |
++----+-----------+
+|  3 | 'spain'   |
++----+-----------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_filter" class="livemark-reference-heading">steps.field_filter <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_filter" class="livemark-reference-heading">steps.field_filter <small>(class)</small></h4>
+      <p>Filter fields.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, names: List[str]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          names
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_filter.names" class="livemark-reference-heading">steps.field_filter.names <small>(property)</small></h4>
+      <p>
+    Names of the field to be read. Other fields will be ignored.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Merge Fields</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+     source,
+     steps=[
+     	 # seperator argument can be used to set delimeter. Default value is '-'
+    	 # preserve argument keeps the original fields
+         steps.field_merge(name="details", from_names=["name", "population"], preserve=True)
+     ],
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'details', 'type': 'string'}]}
++----+-----------+------------+--------------+
+| id | name      | population | details      |
++====+===========+============+==============+
+|  1 | 'germany' |         83 | 'germany-83' |
++----+-----------+------------+--------------+
+|  2 | 'france'  |         66 | 'france-66'  |
++----+-----------+------------+--------------+
+|  3 | 'spain'   |         47 | 'spain-47'   |
++----+-----------+------------+--------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_merge" class="livemark-reference-heading">steps.field_merge <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_merge" class="livemark-reference-heading">steps.field_merge <small>(class)</small></h4>
+      <p>Merge fields.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, from_names: List[str], separator: str = -, preserve: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          from_names
+          (List[str])        </li>
+                <li>
+          separator
+          (str)        </li>
+                <li>
+          preserve
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_merge.name" class="livemark-reference-heading">steps.field_merge.name <small>(property)</small></h4>
+      <p>
+    Name of the new field that will be created after merge.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_merge.from_names" class="livemark-reference-heading">steps.field_merge.from_names <small>(property)</small></h4>
+      <p>
+    List of field names to merge.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_merge.separator" class="livemark-reference-heading">steps.field_merge.separator <small>(property)</small></h4>
+      <p>
+    Separator to use while merging values of the two fields.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_merge.preserve" class="livemark-reference-heading">steps.field_merge.preserve <small>(property)</small></h4>
+      <p>
+    It indicates if the fields are preserved or not after merging. If True,
+    fields will not be removed and vice versa.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Move Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_move(name="id", position=3),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'id', 'type': 'integer'}]}
++-----------+------------+----+
+| name      | population | id |
++===========+============+====+
+| 'germany' |         83 |  1 |
++-----------+------------+----+
+| 'france'  |         66 |  2 |
++-----------+------------+----+
+| 'spain'   |         47 |  3 |
++-----------+------------+----+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_move" class="livemark-reference-heading">steps.field_move <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_move" class="livemark-reference-heading">steps.field_move <small>(class)</small></h4>
+      <p>Move field.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, position: int) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          position
+          (int)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_move.name" class="livemark-reference-heading">steps.field_move.name <small>(property)</small></h4>
+      <p>
+    Field name to move.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_move.position" class="livemark-reference-heading">steps.field_move.position <small>(property)</small></h4>
+      <p>
+    New position for the field being moved.
+    </p>
+            <h5>Signature</h5>
+      <p>int</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Pack Fields</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+    	# field_type returns packed fields as JSON Object. Default value for field_type is 'array'
+    	# preserve argument keeps the original fields
+        steps.field_pack(name="details", from_names=["name", "population"], as_object=True, preserve=True)
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'details', 'type': 'object'}]}
++----+-----------+------------+-----------------------------------------+
+| id | name      | population | details                                 |
++====+===========+============+=========================================+
+|  1 | 'germany' |         83 | {'name': 'germany', 'population': '83'} |
++----+-----------+------------+-----------------------------------------+
+|  2 | 'france'  |         66 | {'name': 'france', 'population': '66'}  |
++----+-----------+------------+-----------------------------------------+
+|  3 | 'spain'   |         47 | {'name': 'spain', 'population': '47'}   |
++----+-----------+------------+-----------------------------------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_pack" class="livemark-reference-heading">steps.field_pack <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-10-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_pack" class="livemark-reference-heading">steps.field_pack <small>(class)</small></h4>
+      <p>Pack fields.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, from_names: List[str], as_object: bool = False, preserve: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          from_names
+          (List[str])        </li>
+                <li>
+          as_object
+          (bool)        </li>
+                <li>
+          preserve
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_pack.name" class="livemark-reference-heading">steps.field_pack.name <small>(property)</small></h4>
+      <p>
+    Name of the new field.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_pack.from_names" class="livemark-reference-heading">steps.field_pack.from_names <small>(property)</small></h4>
+      <p>
+    List of fields to be packed.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_pack.as_object" class="livemark-reference-heading">steps.field_pack.as_object <small>(property)</small></h4>
+      <p>
+    The packed value of the field will be stored as object if set to
+    True.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_pack.preserve" class="livemark-reference-heading">steps.field_pack.preserve <small>(property)</small></h4>
+      <p>
+    Specifies if the field should be preserved or not. If True, fields
+    part of packing process will be preserved.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Remove Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_remove(names=["id"]),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++-----------+------------+
+| name      | population |
++===========+============+
+| 'germany' |         83 |
++-----------+------------+
+| 'france'  |         66 |
++-----------+------------+
+| 'spain'   |         47 |
++-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_remove" class="livemark-reference-heading">steps.field_remove <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_remove" class="livemark-reference-heading">steps.field_remove <small>(class)</small></h4>
+      <p>Remove field.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, names: List[str]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          names
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_remove.names" class="livemark-reference-heading">steps.field_remove.names <small>(property)</small></h4>
+      <p>
+    List of fields to remove.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Split Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_split(name="name", to_names=["name1", "name2"], pattern="a"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'name1', 'type': 'string'},
+            {'name': 'name2', 'type': 'string'}]}
++----+------------+--------+-------+
+| id | population | name1  | name2 |
++====+============+========+=======+
+|  1 |         83 | 'germ' | 'ny'  |
++----+------------+--------+-------+
+|  2 |         66 | 'fr'   | 'nce' |
++----+------------+--------+-------+
+|  3 |         47 | 'sp'   | 'in'  |
++----+------------+--------+-------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_split" class="livemark-reference-heading">steps.field_split <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_split" class="livemark-reference-heading">steps.field_split <small>(class)</small></h4>
+      <p>Split field.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, to_names: List[str], pattern: str, preserve: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          to_names
+          (List[str])        </li>
+                <li>
+          pattern
+          (str)        </li>
+                <li>
+          preserve
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_split.name" class="livemark-reference-heading">steps.field_split.name <small>(property)</small></h4>
+      <p>
+    Name of the field to split.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_split.to_names" class="livemark-reference-heading">steps.field_split.to_names <small>(property)</small></h4>
+      <p>
+    List of names of new fields.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_split.pattern" class="livemark-reference-heading">steps.field_split.pattern <small>(property)</small></h4>
+      <p>
+    Pattern to split the field value, for example: "a".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_split.preserve" class="livemark-reference-heading">steps.field_split.preserve <small>(property)</small></h4>
+      <p>
+    Whether to preserve the fields after the split. If True,
+    the fields are not removed after split.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Unpack Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_update(name="id", value=[1, 1], descriptor={"type": "string"}),
+        steps.field_unpack(name="id", to_names=["id2", "id3"]),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'id2', 'type': 'any'},
+            {'name': 'id3', 'type': 'any'}]}
++-----------+------------+-----+-----+
+| name      | population | id2 | id3 |
++===========+============+=====+=====+
+| 'germany' |         83 |   1 |   1 |
++-----------+------------+-----+-----+
+| 'france'  |         66 |   1 |   1 |
++-----------+------------+-----+-----+
+| 'spain'   |         47 |   1 |   1 |
++-----------+------------+-----+-----+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_unpack" class="livemark-reference-heading">steps.field_unpack <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-16-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_unpack" class="livemark-reference-heading">steps.field_unpack <small>(class)</small></h4>
+      <p>Unpack field.
+
+This step can be added using the `steps` parameter for the
+`transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, to_names: List[str], preserve: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          to_names
+          (List[str])        </li>
+                <li>
+          preserve
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_unpack.name" class="livemark-reference-heading">steps.field_unpack.name <small>(property)</small></h4>
+      <p>
+    Name of the field to unpack.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_unpack.to_names" class="livemark-reference-heading">steps.field_unpack.to_names <small>(property)</small></h4>
+      <p>
+    List of names for new fields that will be created
+    after unpacking.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_unpack.preserve" class="livemark-reference-heading">steps.field_unpack.preserve <small>(property)</small></h4>
+      <p>
+    Whether to preserve the source fields after unpacking.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Update Field</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_update(name="id", value=str, descriptor={"type": "string"}),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'string'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++------+-----------+------------+
+| id   | name      | population |
++======+===========+============+
+| None | 'germany' |         83 |
++------+-----------+------------+
+| None | 'france'  |         66 |
++------+-----------+------------+
+| None | 'spain'   |         47 |
++------+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-18-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.field_update" class="livemark-reference-heading">steps.field_update <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-18-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.field_update" class="livemark-reference-heading">steps.field_update <small>(class)</small></h4>
+      <p>Update field.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, value: Optional[Any] = None, formula: Optional[Any] = None, function: Optional[Any] = None, descriptor: Optional[types.IDescriptor] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          value
+          (Optional[Any])        </li>
+                <li>
+          formula
+          (Optional[Any])        </li>
+                <li>
+          function
+          (Optional[Any])        </li>
+                <li>
+          descriptor
+          (Optional[types.IDescriptor])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.field_update.name" class="livemark-reference-heading">steps.field_update.name <small>(property)</small></h4>
+      <p>
+    Name of the field to update.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_update.value" class="livemark-reference-heading">steps.field_update.value <small>(property)</small></h4>
+      <p>
+    Cell value to set for the field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_update.formula" class="livemark-reference-heading">steps.field_update.formula <small>(property)</small></h4>
+      <p>
+    Evaluatable expressions to set the value for the field. The expressions
+    are processed using simpleeval library.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_update.function" class="livemark-reference-heading">steps.field_update.function <small>(property)</small></h4>
+      <p>
+    Python function to set the value for the field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.field_update.descriptor" class="livemark-reference-heading">steps.field_update.descriptor <small>(property)</small></h4>
+      <p>
+    A descriptor for the field to set the metadata.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[types.IDescriptor]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="row.html">
+        Row Steps »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="table.html">
+        « Table Steps
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/steps/resource.html b/docs/steps/resource.html
new file mode 100644
index 0000000000..c0d93e4f8e
--- /dev/null
+++ b/docs/steps/resource.html
@@ -0,0 +1,3891 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="The Resource steps are only available for a package transformation (except for  steps.resource_update  available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical  transform_resource  step.">
+<meta name="keywords" content="resource,steps">
+<link rel="icon" href="../../assets/logo.png">
+<title>Resource Steps | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary active">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/steps/resource.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Resource Steps</h1>
+<p>The Resource steps are only available for a package transformation (except for <code>steps.resource_update</code> available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical <code>transform_resource</code> step.</p>
+<h2>Add Resource</h2>
+<p>This step add a new resource to a data package.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Package(resources=[Resource(name='main', path="transform.csv")])
+target = transform(
+    source,
+    steps=[
+        steps.resource_add(name='extra', descriptor={'path': 'transform.csv'}),
+    ],
+)
+print(target.resource_names)
+print(target.get_resource('extra').schema)
+print(target.get_resource('extra').to_view())
+</code></pre>
+
+<pre><code class="language-markup">['main', 'extra']
+{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.resource_add" class="livemark-reference-heading">steps.resource_add <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.resource_add" class="livemark-reference-heading">steps.resource_add <small>(class)</small></h4>
+      <p>Add resource.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, descriptor: Dict[str, Any]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          descriptor
+          (Dict[str, Any])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.resource_add.name" class="livemark-reference-heading">steps.resource_add.name <small>(property)</small></h4>
+      <p>
+    Name of the resource to add.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.resource_add.descriptor" class="livemark-reference-heading">steps.resource_add.descriptor <small>(property)</small></h4>
+      <p>
+    A descriptor for the resource.
+    </p>
+            <h5>Signature</h5>
+      <p>Dict[str, Any]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Remove Resource</h2>
+<p>This step remove an existent resource from a data package.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Package(resources=[Resource(name='main', path="transform.csv")])
+target = transform(
+    source,
+    steps=[
+        steps.resource_remove(name='main'),
+    ],
+)
+print(target)
+</code></pre>
+
+<pre><code class="language-markup">{'resources': []}</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.resource_remove" class="livemark-reference-heading">steps.resource_remove <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.resource_remove" class="livemark-reference-heading">steps.resource_remove <small>(class)</small></h4>
+      <p>Remove resource.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.resource_remove.name" class="livemark-reference-heading">steps.resource_remove.name <small>(property)</small></h4>
+      <p>
+    Name of the resource to remove.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Transform Resource</h2>
+<p>It's a hierarchical step allowing to transform a data package's resource. It's possible to use any resource steps as a part of this package step.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Package(resources=[Resource(name='main', path="transform.csv")])
+target = transform(
+    source,
+    steps=[
+        steps.resource_transform(name='main', steps=[
+            steps.row_sort(field_names=['name'])
+        ]),
+    ],
+)
+print(target.resource_names)
+print(target.get_resource('main').schema)
+print(target.get_resource('main').to_view())
+</code></pre>
+
+<pre><code class="language-markup">['main']
+{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.resource_transform" class="livemark-reference-heading">steps.resource_transform <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.resource_transform" class="livemark-reference-heading">steps.resource_transform <small>(class)</small></h4>
+      <p>Transform resource.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: str, steps: List[Step]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (str)        </li>
+                <li>
+          steps
+          (List[Step])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.resource_transform.name" class="livemark-reference-heading">steps.resource_transform.name <small>(property)</small></h4>
+      <p>
+    Name of the resource to transform.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.resource_transform.steps" class="livemark-reference-heading">steps.resource_transform.steps <small>(property)</small></h4>
+      <p>
+    List of transformation steps to apply to the given
+    resource.
+    </p>
+            <h5>Signature</h5>
+      <p>List[Step]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Update Resource</h2>
+<p>This step update a resource's metadata. It can be used for both resource and package transformations.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Package(resources=[Resource(name='main', path="transform.csv")])
+target = transform(
+    source,
+    steps=[
+        steps.resource_update(
+          name='main',
+          descriptor={'title': 'Main Resource', 'description': 'For the docs'}
+        ),
+    ],
+)
+print(target.get_resource('main'))
+</code></pre>
+
+<pre><code class="language-markup">{'name': 'main',
+ 'type': 'table',
+ 'title': 'Main Resource',
+ 'description': 'For the docs',
+ 'path': 'transform.csv',
+ 'scheme': 'file',
+ 'format': 'csv',
+ 'mediatype': 'text/csv'}</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.resource_update" class="livemark-reference-heading">steps.resource_update <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.resource_update" class="livemark-reference-heading">steps.resource_update <small>(class)</small></h4>
+      <p>Update resource.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, title: Optional[str] = None, description: Optional[str] = None, name: Optional[str] = None, descriptor: types.IDescriptor) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          descriptor
+          (types.IDescriptor)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.resource_update.name" class="livemark-reference-heading">steps.resource_update.name <small>(property)</small></h4>
+      <p>
+    Name of the resource to update.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.resource_update.descriptor" class="livemark-reference-heading">steps.resource_update.descriptor <small>(property)</small></h4>
+      <p>
+    New descriptor for the resource to update metadata.
+    </p>
+            <h5>Signature</h5>
+      <p>types.IDescriptor</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="table.html">
+        Table Steps »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="../checks/cell.html">
+        « Cell Checks
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/steps/row.html b/docs/steps/row.html
new file mode 100644
index 0000000000..36e62c450b
--- /dev/null
+++ b/docs/steps/row.html
@@ -0,0 +1,4326 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="These steps are row-based including row filtering, slicing, and many more.">
+<meta name="keywords" content="row,steps">
+<link rel="icon" href="../../assets/logo.png">
+<title>Row Steps | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/steps/row.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Row Steps</h1>
+<p>These steps are row-based including row filtering, slicing, and many more.</p>
+<h2>Filter Rows</h2>
+<p>This step filters rows based on a provided formula or function.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.row_filter(formula="id &gt; 1"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+----------+------------+
+| id | name     | population |
++====+==========+============+
+|  2 | 'france' |         66 |
++----+----------+------------+
+|  3 | 'spain'  |         47 |
++----+----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_filter" class="livemark-reference-heading">steps.row_filter <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_filter" class="livemark-reference-heading">steps.row_filter <small>(class)</small></h4>
+      <p>Filter rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, formula: Optional[Any] = None, function: Optional[Any] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          formula
+          (Optional[Any])        </li>
+                <li>
+          function
+          (Optional[Any])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_filter.formula" class="livemark-reference-heading">steps.row_filter.formula <small>(property)</small></h4>
+      <p>
+    Evaluatable expressions to filter the rows. Rows that matches the formula
+    are returned and others are ignored. The expressions are processed using
+    simpleeval library.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_filter.function" class="livemark-reference-heading">steps.row_filter.function <small>(property)</small></h4>
+      <p>
+    Python function to filter the row.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[Any]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Search Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.row_search(regex=r"^f.*"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+----------+------------+
+| id | name     | population |
++====+==========+============+
+|  2 | 'france' |         66 |
++----+----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_search" class="livemark-reference-heading">steps.row_search <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_search" class="livemark-reference-heading">steps.row_search <small>(class)</small></h4>
+      <p>Search rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, regex: str, field_name: Optional[str] = None, negate: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          regex
+          (str)        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+                <li>
+          negate
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_search.regex" class="livemark-reference-heading">steps.row_search.regex <small>(property)</small></h4>
+      <p>
+    Regex pattern to search for rows. If field_name is set it
+    will only be applied to the specified field. For example, regex=r"^e.*".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_search.field_name" class="livemark-reference-heading">steps.row_search.field_name <small>(property)</small></h4>
+      <p>
+    Field name in which to search for.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_search.negate" class="livemark-reference-heading">steps.row_search.negate <small>(property)</small></h4>
+      <p>
+    Whether to revert the result. If True, all the rows that does
+    not match the pattern will be returned.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Slice Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.row_slice(head=2),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'france'  |         66 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_slice" class="livemark-reference-heading">steps.row_slice <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_slice" class="livemark-reference-heading">steps.row_slice <small>(class)</small></h4>
+      <p>Slice rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None, head: Optional[int] = None, tail: Optional[int] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          start
+          (Optional[int])        </li>
+                <li>
+          stop
+          (Optional[int])        </li>
+                <li>
+          step
+          (Optional[int])        </li>
+                <li>
+          head
+          (Optional[int])        </li>
+                <li>
+          tail
+          (Optional[int])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_slice.start" class="livemark-reference-heading">steps.row_slice.start <small>(property)</small></h4>
+      <p>
+    Starting point from where to read the rows. If None,
+    defaults to the beginning.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_slice.stop" class="livemark-reference-heading">steps.row_slice.stop <small>(property)</small></h4>
+      <p>
+    Stopping point for reading row. If None, defaults to
+    the end.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_slice.step" class="livemark-reference-heading">steps.row_slice.step <small>(property)</small></h4>
+      <p>
+    It is the step size to read next row. If None, it defaults
+    to 1.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_slice.head" class="livemark-reference-heading">steps.row_slice.head <small>(property)</small></h4>
+      <p>
+    Number of rows to read from head.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_slice.tail" class="livemark-reference-heading">steps.row_slice.tail <small>(property)</small></h4>
+      <p>
+    Number of rows to read from the bottom.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[int]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Sort Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.row_sort(field_names=["name"]),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_sort" class="livemark-reference-heading">steps.row_sort <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_sort" class="livemark-reference-heading">steps.row_sort <small>(class)</small></h4>
+      <p>Sort rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_names: List[str], reverse: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_names
+          (List[str])        </li>
+                <li>
+          reverse
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_sort.field_names" class="livemark-reference-heading">steps.row_sort.field_names <small>(property)</small></h4>
+      <p>
+    List of field names by which the rows will be
+    sorted. If fields more than 1, sort applies from
+    left to right.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_sort.reverse" class="livemark-reference-heading">steps.row_sort.reverse <small>(property)</small></h4>
+      <p>
+    The sort will be reversed if it is set to True.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Split Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.row_split(field_name="name", pattern="a"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+--------+------------+
+| id | name   | population |
++====+========+============+
+|  1 | 'germ' |         83 |
++----+--------+------------+
+|  1 | 'ny'   |         83 |
++----+--------+------------+
+|  2 | 'fr'   |         66 |
++----+--------+------------+
+|  2 | 'nce'  |         66 |
++----+--------+------------+
+|  3 | 'sp'   |         47 |
++----+--------+------------+
+...</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_split" class="livemark-reference-heading">steps.row_split <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-10-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_split" class="livemark-reference-heading">steps.row_split <small>(class)</small></h4>
+      <p>Split rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, pattern: str, field_name: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          pattern
+          (str)        </li>
+                <li>
+          field_name
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_split.pattern" class="livemark-reference-heading">steps.row_split.pattern <small>(property)</small></h4>
+      <p>
+    Pattern to search for in one or more fields.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_split.field_name" class="livemark-reference-heading">steps.row_split.field_name <small>(property)</small></h4>
+      <p>
+    Field name whose cell value will be splitted.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Subset Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.field_update(name="id", value=1),
+        steps.row_subset(subset="conflicts", field_name="id"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  1 | 'france'  |         66 |
++----+-----------+------------+
+|  1 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_subset" class="livemark-reference-heading">steps.row_subset <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_subset" class="livemark-reference-heading">steps.row_subset <small>(class)</small></h4>
+      <p>Subset rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, subset: str, field_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          subset
+          (str)        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_subset.subset" class="livemark-reference-heading">steps.row_subset.subset <small>(property)</small></h4>
+      <p>
+    It can take different values such as "conflicts","distinct","duplicates"
+    and "unique".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_subset.field_name" class="livemark-reference-heading">steps.row_subset.field_name <small>(property)</small></h4>
+      <p>
+    Name of field to which the subset functions will be applied.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Ungroup Rows</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform-groups.csv")
+target = transform(
+    source,
+    steps=[
+        steps.row_ungroup(group_name="name", selection="first"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'year', 'type': 'integer'}]}
++----+-----------+------------+------+
+| id | name      | population | year |
++====+===========+============+======+
+|  3 | 'france'  |         66 | 2020 |
++----+-----------+------------+------+
+|  1 | 'germany' |         83 | 2020 |
++----+-----------+------------+------+
+|  5 | 'spain'   |         47 | 2020 |
++----+-----------+------------+------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.row_ungroup" class="livemark-reference-heading">steps.row_ungroup <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.row_ungroup" class="livemark-reference-heading">steps.row_ungroup <small>(class)</small></h4>
+      <p>Ungroup rows.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, selection: str, group_name: str, value_name: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          selection
+          (str)        </li>
+                <li>
+          group_name
+          (str)        </li>
+                <li>
+          value_name
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.row_ungroup.selection" class="livemark-reference-heading">steps.row_ungroup.selection <small>(property)</small></h4>
+      <p>
+    Specifies whether to return first or last row. The value
+    can be "first", "last", "min" and "max".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_ungroup.group_name" class="livemark-reference-heading">steps.row_ungroup.group_name <small>(property)</small></h4>
+      <p>
+    Field name which will be used to group the rows. And it returns the
+    first or last row with each group based on the 'selection'.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.row_ungroup.value_name" class="livemark-reference-heading">steps.row_ungroup.value_name <small>(property)</small></h4>
+      <p>
+    If the selection is set to "min" or "max", the rows will be grouped by
+    "group_name" field and min or max value will be then selected from the
+    "value_name" field.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="cell.html">
+        Cell Steps »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="field.html">
+        « Field Steps
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/steps/table.html b/docs/steps/table.html
new file mode 100644
index 0000000000..c273c03376
--- /dev/null
+++ b/docs/steps/table.html
@@ -0,0 +1,5191 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting.">
+<meta name="keywords" content="table,steps">
+<link rel="icon" href="../../assets/logo.png">
+<title>Table Steps | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="../..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group active">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary active">
+          <a href="table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="../codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="../codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="../universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/steps/table.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Table Steps</h1>
+<p>These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting.</p>
+<h2>Aggregate Table</h2>
+<p>Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example)</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-1-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-1-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform-groups.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_aggregate(
+            group_name="name", aggregation={"sum": ("population", sum)}
+        ),
+    ],
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'}, {'name': 'sum', 'type': 'any'}]}
++-----------+-----+
+| name      | sum |
++===========+=====+
+| 'france'  | 120 |
++-----------+-----+
+| 'germany' | 160 |
++-----------+-----+
+| 'spain'   |  80 |
++-----------+-----+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-2-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-2-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_aggregate" class="livemark-reference-heading">steps.table_aggregate <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-2-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_aggregate" class="livemark-reference-heading">steps.table_aggregate <small>(class)</small></h4>
+      <p>Aggregate table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, aggregation: Dict[str, Any], group_name: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          aggregation
+          (Dict[str, Any])        </li>
+                <li>
+          group_name
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_aggregate.aggregation" class="livemark-reference-heading">steps.table_aggregate.aggregation <small>(property)</small></h4>
+      <p>
+    A dictionary with aggregation function. The values
+    could be max, min, len and sum.
+    </p>
+            <h5>Signature</h5>
+      <p>Dict[str, Any]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_aggregate.group_name" class="livemark-reference-heading">steps.table_aggregate.group_name <small>(property)</small></h4>
+      <p>
+    Field by which the rows will be grouped.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Attach Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-3-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-3-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+      steps.table_attach(resource=Resource(data=[["note"], ["large"], ["mid"]])),
+    ],
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'note', 'type': 'string'}]}
++----+-----------+------------+---------+
+| id | name      | population | note    |
++====+===========+============+=========+
+|  1 | 'germany' |         83 | 'large' |
++----+-----------+------------+---------+
+|  2 | 'france'  |         66 | 'mid'   |
++----+-----------+------------+---------+
+|  3 | 'spain'   |         47 | None    |
++----+-----------+------------+---------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-4-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-4-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_attach" class="livemark-reference-heading">steps.table_attach <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-4-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_attach" class="livemark-reference-heading">steps.table_attach <small>(class)</small></h4>
+      <p>Attach table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str]) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_attach.resource" class="livemark-reference-heading">steps.table_attach.resource <small>(property)</small></h4>
+      <p>
+    Data Resource to attach to the existing table.
+    </p>
+            <h5>Signature</h5>
+      <p>Union[Resource, str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Debug Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-5-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-5-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+      steps.table_debug(function=print),
+    ],
+)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'id': 1, 'name': 'germany', 'population': 83}
+{'id': 2, 'name': 'france', 'population': 66}
+{'id': 3, 'name': 'spain', 'population': 47}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-6-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-6-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_debug" class="livemark-reference-heading">steps.table_debug <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-6-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_debug" class="livemark-reference-heading">steps.table_debug <small>(class)</small></h4>
+      <p>Debug table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, function: Any) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          function
+          (Any)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_debug.function" class="livemark-reference-heading">steps.table_debug.function <small>(property)</small></h4>
+      <p>
+    Debug function to apply to the table row.
+    </p>
+            <h5>Signature</h5>
+      <p>Any</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Diff Tables</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-7-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-7-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_diff(
+            resource=Resource(
+                data=[
+                    ["id", "name", "population"],
+                    [1, "germany", 83],
+                    [2, "france", 50],
+                    [3, "spain", 47],
+                ]
+            )
+        ),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+----------+------------+
+| id | name     | population |
++====+==========+============+
+|  2 | 'france' |         66 |
++----+----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-8-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-8-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_diff" class="livemark-reference-heading">steps.table_diff <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-8-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_diff" class="livemark-reference-heading">steps.table_diff <small>(class)</small></h4>
+      <p>Diff tables.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], ignore_order: bool = False, use_hash: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+                <li>
+          ignore_order
+          (bool)        </li>
+                <li>
+          use_hash
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_diff.resource" class="livemark-reference-heading">steps.table_diff.resource <small>(property)</small></h4>
+      <p>
+    Resource with which to compare.
+    </p>
+            <h5>Signature</h5>
+      <p>Union[Resource, str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_diff.ignore_order" class="livemark-reference-heading">steps.table_diff.ignore_order <small>(property)</small></h4>
+      <p>
+    Specifies whether to ignore the order of the rows.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_diff.use_hash" class="livemark-reference-heading">steps.table_diff.use_hash <small>(property)</small></h4>
+      <p>
+    Specifies whether to use hash or not. If yes, alternative implementation will
+    be used where the complement is executed by constructing an in-memory set for
+    all rows found in the right hand table. For more information
+    please see the link below:
+    https://petl.readthedocs.io/en/stable/transform.html#petl.transform.setops.hashcomplement
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Intersect Tables</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-9-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-9-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_intersect(
+            resource=Resource(
+                data=[
+                    ["id", "name", "population"],
+                    [1, "germany", 83],
+                    [2, "france", 50],
+                    [3, "spain", 47],
+                ]
+            ),
+        ),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-10-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-10-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_intersect" class="livemark-reference-heading">steps.table_intersect <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-10-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_intersect" class="livemark-reference-heading">steps.table_intersect <small>(class)</small></h4>
+      <p>Intersect tables.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], use_hash: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+                <li>
+          use_hash
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_intersect.resource" class="livemark-reference-heading">steps.table_intersect.resource <small>(property)</small></h4>
+      <p>
+    Resource with which to apply intersection.
+    </p>
+            <h5>Signature</h5>
+      <p>Union[Resource, str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_intersect.use_hash" class="livemark-reference-heading">steps.table_intersect.use_hash <small>(property)</small></h4>
+      <p>
+    Specifies whether to use hash or not. If yes, an
+    alternative implementation will be used. For more
+    information please see the link below:
+    https://petl.readthedocs.io/en/stable/transform.html#petl.transform.setops.hashintersection
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Join Tables</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-11-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-11-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_join(
+            resource=Resource(data=[["id", "note"], [1, "beer"], [2, "vine"]]),
+            field_name="id",
+        ),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'},
+            {'name': 'note', 'type': 'string'}]}
++----+-----------+------------+--------+
+| id | name      | population | note   |
++====+===========+============+========+
+|  1 | 'germany' |         83 | 'beer' |
++----+-----------+------------+--------+
+|  2 | 'france'  |         66 | 'vine' |
++----+-----------+------------+--------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-12-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-12-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_join" class="livemark-reference-heading">steps.table_join <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-12-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_join" class="livemark-reference-heading">steps.table_join <small>(class)</small></h4>
+      <p>Join tables.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], field_name: Optional[str] = None, use_hash: bool = False, mode: str = inner) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+                <li>
+          field_name
+          (Optional[str])        </li>
+                <li>
+          use_hash
+          (bool)        </li>
+                <li>
+          mode
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_join.resource" class="livemark-reference-heading">steps.table_join.resource <small>(property)</small></h4>
+      <p>
+    Resource with which to apply join.
+    </p>
+            <h5>Signature</h5>
+      <p>Union[Resource, str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_join.field_name" class="livemark-reference-heading">steps.table_join.field_name <small>(property)</small></h4>
+      <p>
+    Field name with which the join will be performed comparing it's value between two tables.
+    If not provided natural join is tried. For more information, please see the following document:
+    https://petl.readthedocs.io/en/stable/_modules/petl/transform/joins.html
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_join.use_hash" class="livemark-reference-heading">steps.table_join.use_hash <small>(property)</small></h4>
+      <p>
+    Specify whether to use hash or not. If True, an alternative implementation of join will be used.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_join.mode" class="livemark-reference-heading">steps.table_join.mode <small>(property)</small></h4>
+      <p>
+    Specifies which mode to use. The available modes are: "inner", "left", "right", "outer", "cross" and
+    "negate". The default mode is "inner".
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Melt Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-13-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-13-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_melt(field_name="name"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'name', 'type': 'string'},
+            {'name': 'variable', 'type': 'string'},
+            {'name': 'value', 'type': 'any'}]}
++-----------+--------------+-------+
+| name      | variable     | value |
++===========+==============+=======+
+| 'germany' | 'id'         |     1 |
++-----------+--------------+-------+
+| 'germany' | 'population' |    83 |
++-----------+--------------+-------+
+| 'france'  | 'id'         |     2 |
++-----------+--------------+-------+
+| 'france'  | 'population' |    66 |
++-----------+--------------+-------+
+| 'spain'   | 'id'         |     3 |
++-----------+--------------+-------+
+...</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-14-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-14-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_melt" class="livemark-reference-heading">steps.table_melt <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-14-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_melt" class="livemark-reference-heading">steps.table_melt <small>(class)</small></h4>
+      <p>Melt tables.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_name: str, variables: Optional[str] = None, to_field_names: List[str] = NOTHING) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          variables
+          (Optional[str])        </li>
+                <li>
+          to_field_names
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_melt.field_name" class="livemark-reference-heading">steps.table_melt.field_name <small>(property)</small></h4>
+      <p>
+    Field name which will be use to melt table. It will keep
+    the field 'field_name' as it is but melt other fields into
+    data.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_melt.variables" class="livemark-reference-heading">steps.table_melt.variables <small>(property)</small></h4>
+      <p>
+    List of name of fields which will be melted into data.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_melt.to_field_names" class="livemark-reference-heading">steps.table_melt.to_field_names <small>(property)</small></h4>
+      <p>
+    Labels for new fields that will be created "variable" and "value".
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Merge Tables</h2>
+<h3>Example</h3>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This functionality is currently disabled as being fixed in <a href="https://github.com/frictionlessdata/frictionless-py/issues/1221">#1221</a>
+  </div>
+</div></div><ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-15-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-15-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_merge(
+            resource=Resource(data=[["id", "name", "note"], [4, "malta", "island"]])
+        ),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-16-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-16-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_merge" class="livemark-reference-heading">steps.table_merge <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-16-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_merge" class="livemark-reference-heading">steps.table_merge <small>(class)</small></h4>
+      <p>Merge tables.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, resource: Union[Resource, str], field_names: List[str] = NOTHING, sort_by_field: Optional[str] = None, ignore_fields: bool = False) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          resource
+          (Union[Resource, str])        </li>
+                <li>
+          field_names
+          (List[str])        </li>
+                <li>
+          sort_by_field
+          (Optional[str])        </li>
+                <li>
+          ignore_fields
+          (bool)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_merge.resource" class="livemark-reference-heading">steps.table_merge.resource <small>(property)</small></h4>
+      <p>
+    Resource to merge with.
+    </p>
+            <h5>Signature</h5>
+      <p>Union[Resource, str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_merge.field_names" class="livemark-reference-heading">steps.table_merge.field_names <small>(property)</small></h4>
+      <p>
+    Specifies fixed headers for output table.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_merge.sort_by_field" class="livemark-reference-heading">steps.table_merge.sort_by_field <small>(property)</small></h4>
+      <p>
+    Field name by which to sort the record after merging.
+    </p>
+            <h5>Signature</h5>
+      <p>Optional[str]</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_merge.ignore_fields" class="livemark-reference-heading">steps.table_merge.ignore_fields <small>(property)</small></h4>
+      <p>
+    If ignore_fields is set to True, it will merge two resource
+    without matching headers.
+    </p>
+            <h5>Signature</h5>
+      <p>bool</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Normalize Table</h2>
+<p>The <code>table_normalize</code> step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it's recommended to normalize a table before any others steps.</p>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-17-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-17-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource("table.csv")
+print(source.read_cells())
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+    ]
+)
+print(target.read_cells())
+</code></pre>
+
+<pre><code class="language-markup">[['id', 'name'], ['1', 'english'], ['2', '中国人']]
+[['id', 'name'], [1, 'english'], [2, '中国人']]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-18-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-18-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_normalize" class="livemark-reference-heading">steps.table_normalize <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-18-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_normalize" class="livemark-reference-heading">steps.table_normalize <small>(class)</small></h4>
+      <p>Normalize table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Pivot Table</h2>
+<h3>Example</h3>
+<div data-type="danger"><div class="livemark-remark">
+  <div class="alert alert-danger" role="alert">
+    This functionality is currently disabled as being fixed in <a href="https://github.com/frictionlessdata/frictionless-py/issues/1220">#1220</a>
+  </div>
+</div></div><ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-19-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-19-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform-pivot.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_pivot(f1="region", f2="gender", f3="units", aggfun=sum),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-20-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-20-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_pivot" class="livemark-reference-heading">steps.table_pivot <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-20-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_pivot" class="livemark-reference-heading">steps.table_pivot <small>(class)</small></h4>
+      <p>Pivot table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, f1: str, f2: str, f3: str, aggfun: Any) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          f1
+          (str)        </li>
+                <li>
+          f2
+          (str)        </li>
+                <li>
+          f3
+          (str)        </li>
+                <li>
+          aggfun
+          (Any)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_pivot.f1" class="livemark-reference-heading">steps.table_pivot.f1 <small>(property)</small></h4>
+      <p>
+    Field that makes the rows in the output pivot table.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_pivot.f2" class="livemark-reference-heading">steps.table_pivot.f2 <small>(property)</small></h4>
+      <p>
+    Field that makes the columns in the output pivot table.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_pivot.f3" class="livemark-reference-heading">steps.table_pivot.f3 <small>(property)</small></h4>
+      <p>
+    Field that forms the data in the output pivot table.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_pivot.aggfun" class="livemark-reference-heading">steps.table_pivot.aggfun <small>(property)</small></h4>
+      <p>
+    Function to process and create data in the output pivot table.
+    The function can be "sum", "max", "min", "len" etc.
+    </p>
+            <h5>Signature</h5>
+      <p>Any</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Print Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-21-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-21-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_print(),
+    ]
+)
+</code></pre>
+
+<pre><code class="language-markup">==  =======  ==========
+id  name     population
+==  =======  ==========
+ 1  germany          83
+ 2  france           66
+ 3  spain            47
+==  =======  ==========</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-22-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-22-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-22-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_print" class="livemark-reference-heading">steps.table_print <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-22-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_print" class="livemark-reference-heading">steps.table_print <small>(class)</small></h4>
+      <p>Print table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Recast Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-23-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-23-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_melt(field_name="id"),
+        steps.table_recast(field_name="id"),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
++----+-----------+------------+
+| id | name      | population |
++====+===========+============+
+|  1 | 'germany' |         83 |
++----+-----------+------------+
+|  2 | 'france'  |         66 |
++----+-----------+------------+
+|  3 | 'spain'   |         47 |
++----+-----------+------------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-24-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-24-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-24-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_recast" class="livemark-reference-heading">steps.table_recast <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-24-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_recast" class="livemark-reference-heading">steps.table_recast <small>(class)</small></h4>
+      <p>Recast table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, field_name: str, from_field_names: List[str] = NOTHING) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          field_name
+          (str)        </li>
+                <li>
+          from_field_names
+          (List[str])        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_recast.field_name" class="livemark-reference-heading">steps.table_recast.field_name <small>(property)</small></h4>
+      <p>
+    Recast table by the field 'field_name'.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+        <div>
+      <h4 id="reference-steps.table_recast.from_field_names" class="livemark-reference-heading">steps.table_recast.from_field_names <small>(property)</small></h4>
+      <p>
+    List of field names for the output table.
+    </p>
+            <h5>Signature</h5>
+      <p>List[str]</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Transpose Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-25-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-25-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_normalize(),
+        steps.table_transpose(),
+    ]
+)
+print(target.schema)
+print(target.to_view())
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'string'},
+            {'name': '1', 'type': 'any'},
+            {'name': '2', 'type': 'any'},
+            {'name': '3', 'type': 'any'}]}
++--------------+-----------+----------+---------+
+| id           | 1         | 2        | 3       |
++==============+===========+==========+=========+
+| 'name'       | 'germany' | 'france' | 'spain' |
++--------------+-----------+----------+---------+
+| 'population' |        83 |       66 |      47 |
++--------------+-----------+----------+---------+</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-26-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-26-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-26-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_transpose" class="livemark-reference-heading">steps.table_transpose <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-26-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_transpose" class="livemark-reference-heading">steps.table_transpose <small>(class)</small></h4>
+      <p>Transpose table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Validate Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-27-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-27-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.cell_set(field_name="population", value="bad"),
+        steps.table_validate(),
+    ]
+)
+pprint(target.schema)
+try:
+  pprint(target.to_view())
+except Exception as exception:
+  pprint(exception)
+</code></pre>
+
+<pre><code class="language-markup">{'fields': [{'name': 'id', 'type': 'integer'},
+            {'name': 'name', 'type': 'string'},
+            {'name': 'population', 'type': 'integer'}]}
+FrictionlessException('[step-error] Step is not valid: "table_validate" raises "[type-error] Type error in the cell "bad" in row "2" and field "population" at position "3": type is "integer/default" " ')</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-28-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-28-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-28-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_validate" class="livemark-reference-heading">steps.table_validate <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-28-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_validate" class="livemark-reference-heading">steps.table_validate <small>(class)</small></h4>
+      <p>Validate table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+              </ul>
+                </div>
+
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div><h2>Write Table</h2>
+<h3>Example</h3>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-29-Python" class="nav-link active" data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-29-Python" class="tab-pane fade active show">
+    <pre><code class="language-python">from pprint import pprint
+from frictionless import Package, Resource, transform, steps
+
+source = Resource(path="transform.csv")
+target = transform(
+    source,
+    steps=[
+        steps.table_write(path='transform.json'),
+    ]
+)
+</code></pre>
+
+  </div>
+  </div><p>Let's read the output:</p>
+<ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-30-CLI" class="nav-link active" data-toggle="tab">
+      CLI
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-30-Python" class="nav-link " data-toggle="tab">
+      Python
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-30-CLI" class="tab-pane fade active show">
+    <pre><code class="language-bash">cat transform.json
+</code></pre>
+
+<pre><code class="language-markup">[
+  [
+    "id",
+    "name",
+    "population"
+  ],
+  [
+    1,
+    "germany",
+    83
+  ],
+  [
+    2,
+    "france",
+    66
+  ],
+  [
+    3,
+    "spain",
+    47
+  ]
+]</code></pre>
+
+  </div>
+    <div id="livemark-tabs-30-Python" class="tab-pane fade ">
+    <pre><code class="language-python">with open('transform.json') as file:
+    print(file.read())
+</code></pre>
+
+<pre><code class="language-markup">[
+  [
+    "id",
+    "name",
+    "population"
+  ],
+  [
+    1,
+    "germany",
+    83
+  ],
+  [
+    2,
+    "france",
+    66
+  ],
+  [
+    3,
+    "spain",
+    47
+  ]
+]</code></pre>
+
+  </div>
+  </div><h3>Reference</h3>
+<div><div class="livemark-reference">
+   
+  <ul class="nav nav-tabs">
+    <li class="nav-item">
+    <a href="#livemark-tabs-31-Hide" class="nav-link active" data-toggle="tab">
+      Hide
+    </a>
+  </li>
+    <li class="nav-item">
+    <a href="#livemark-tabs-31-Show" class="nav-link " data-toggle="tab">
+      Show
+    </a>
+  </li>
+  </ul>
+<div class="tab-content">
+    <div id="livemark-tabs-31-Hide" class="tab-pane fade active show">
+    
+          <h4 id="hidden-reference-steps.table_write" class="livemark-reference-heading">steps.table_write <small>(class)</small></h4>
+      
+  </div>
+    <div id="livemark-tabs-31-Show" class="tab-pane fade ">
+    
+            
+    <div>
+      <h4 id="reference-steps.table_write" class="livemark-reference-heading">steps.table_write <small>(class)</small></h4>
+      <p>Write table.
+
+This step can be added using the `steps` parameter
+for the `transform` function.</p>
+            <h5>Signature</h5>
+      <p>(*, name: Optional[str] = None, title: Optional[str] = None, description: Optional[str] = None, path: str) -&gt; None</p>
+            <h5>Parameters</h5>
+      <ul>
+                <li>
+          name
+          (Optional[str])        </li>
+                <li>
+          title
+          (Optional[str])        </li>
+                <li>
+          description
+          (Optional[str])        </li>
+                <li>
+          path
+          (str)        </li>
+              </ul>
+                </div>
+
+        <div>
+      <h4 id="reference-steps.table_write.path" class="livemark-reference-heading">steps.table_write.path <small>(property)</small></h4>
+      <p>
+    Path of the file to write the table content.
+    </p>
+            <h5>Signature</h5>
+      <p>str</p>
+          </div>
+    
+    
+    
+              
+  </div>
+  </div> 
+</div></div>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="field.html">
+        Field Steps »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="resource.html">
+        « Resource Steps
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': '../getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': '../basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': '../guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': '../guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': '../guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': '../guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': '../console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': '../console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': '../console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': '../console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': '../console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': '../console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': '../console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': '../console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': '../console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': '../console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': '../console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': '../framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': '../framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': '../framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': '../framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': '../framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': '../framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': '../framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': '../framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': '../framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': '../framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': '../framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': '../framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': '../framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': '../advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': '../advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': '../advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': '../resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': '../resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': '../resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': '../resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': '../schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': '../schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': '../schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': '../schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': '../schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': '../schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': '../formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': '../formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': '../formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': '../formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': '../formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': '../formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': '../formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': '../formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': '../formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': '../formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': '../formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': '../formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': '../formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': '../formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': '../formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': '../formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': '../portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': '../portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': '../portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': '../checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': '../checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': '../checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': '../checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': '../fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': '../fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': '../fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': '../fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': '../fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': '../fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': '../fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': '../fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': '../fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': '../fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': '../fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': '../fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': '../fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': '../fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': '../fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': '../errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': '../errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': '../errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': '../errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': '../errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': '../errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': '../errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': '../errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': '../errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': '../codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': '../codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': '../codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': '../codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': '../codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': '../universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/universe.html b/docs/universe.html
new file mode 100644
index 0000000000..454c0000aa
--- /dev/null
+++ b/docs/universe.html
@@ -0,0 +1,3472 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="None">
+<meta name="keywords" content="universe">
+<link rel="icon" href="../assets/logo.png">
+<title>Universe | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-topics {
+  color: #888;
+}
+
+#livemark-topics .toc>.toc-list {
+  overflow: hidden;
+  position: relative;
+}
+
+#livemark-topics .toc>.toc-list li {
+  list-style: none;
+}
+
+#livemark-topics .toc-list {
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-topics a.toc-link {
+  color: currentColor;
+}
+
+#livemark-topics a.toc-link:hover {
+  color: #80b2e6;
+}
+
+#livemark-topics .is-active-link {
+  font-weight: 700;
+}
+
+#livemark-topics ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-topics li.is-active-li ul.secondary {
+  display: block;
+}
+
+#livemark-topics li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-topics li.group.is-active-li a.primary::after {
+  content: "\f078";
+}
+
+#livemark-topics a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-topics a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href="..">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  ">
+      <a href="../index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  active">
+      <a href="universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="../blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/docs/universe.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>Universe</h1>
+<h2>Notebooks</h2>
+<ul>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb">Frictionless Cars</a></li>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb">Frictionless Biology</a></li>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb">Frictionless Describe and Extract</a></li>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb">Frictionless Excel</a></li>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb">Frictionless Research Data Management Workflows</a></li>
+<li><a href="https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb">Frictionless Export, Markdown and Other</a></li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="../blog/index.html">
+        Blog »
+      </a>
+    </div>
+            <div class="prev">
+      <a href="codebase/contributing.html">
+        « Contributing
+      </a>
+    </div>
+      </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-topics">
+  <div class="toc">
+  </div>
+</div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script src="https://unpkg.com/tocbot@4.12.3/dist/tocbot.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Start tocbot
+  tocbot.init({
+    // Where to render the table of contents.
+    tocSelector: ".toc",
+    // Where to grab the headings to build the table of contents.
+    contentSelector: "#livemark-main",
+    // Which headings to grab inside of the contentSelector element.
+    headingSelector: "h2",
+    // For headings inside relative or absolute positioned containers within content.
+    hasInnerContainers: true,
+    // Called each time a heading is parsed. Expects a string in return.
+    headingLabelCallback: (label) => {
+      label = label.replace(/(^#|#$)/g, "").trim();
+      // label = label.replace(/\(.*?\)$/g, "");
+      return label;
+    },
+    // Disable generating ordered lists (ol)
+    orderedList: false,
+    // Fix active link class
+    onClick: syncList,
+    scrollEndCallback: syncList,
+  });
+
+  // Style list
+  $("#livemark-topics .toc > ul").addClass("primary");
+  $("#livemark-topics .toc > ul > li").addClass("primary");
+  $("#livemark-topics .toc > ul > li > a").addClass("primary");
+  $("#livemark-topics .toc ul.is-collapsible").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li").addClass("secondary");
+  $("#livemark-topics .toc ul.is-collapsible li > a").addClass("secondary");
+  for (const element of $("#livemark-topics .primary")) {
+    if ($(element).find(".secondary").length) {
+      $(element).addClass("group");
+    }
+  }
+
+  // Sync list
+  function syncList() {
+    for (const element of $("#livemark-topics li.primary")) {
+      if ($(element).find(".is-active-li").length) {
+        $(element).addClass("is-active-li");
+      }
+    }
+  }
+  syncList();
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '../404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': '../index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': 'getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': 'basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'errors/metadata',
+          'text': "# Metadata Errors ## Metadata Error | Name | Value | | ----------- | -------------------------- | | Type | metadata-error | | Title | Metadata Error | | Description | There is a metadata error. | | Template | Metadata error: {note} | ## Catalog Error | Name | Value | | ----------- | -------------------------- | | Type | catalog-error | | Title | Catalog Error | | Description | A validation cannot be processed. | | Template | The data catalog has an error: {note} | ## Dataset Error | Name | Value | | ----------- | -------------------------- | | Type | dataset-error | | Title | Dataset Error | | Description | A validation cannot be processed. | | Template | The dataset has an error: {note} | ## Checklist Error | Name | Value | | ----------- | -------------------------- | | Type | checklist-error | | Title | Checklist Error | | Description | Provided checklist is not valid. | | Template | Checklist is not valid: {note} | ## Check Error | Name | Value | | ----------- | -------------------------- | | Type | check-error | | Title | Check Error | | Description | Provided check is not valid | | Template | Check is not valid: {note} | ## Detector Error | Name | Value | | ----------- | -------------------------- | | Type | detector-error | | Title | Detector Error | | Description | Provided detector is not valid. | | Template | Detector is not valid: {note} | ## Dialect Error | Name | Value | | ----------- | -------------------------- | | Type | dialect-error | | Title | Dialect Error | | Description | Provided dialect is not valid. | | Template | Dialect is not valid: {note} | ## Control Error | Name | Value | | ----------- | -------------------------- | | Type | control-error | | Title | Control Error | | Description | Provided control is not valid. | | Template | Control is not valid: {note} | ## Inquiry Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-error | | Title | Inquiry Error | | Description | Provided inquiry is not valid. | | Template | Inquiry is not valid: {note} | ## Inquiry Task Error | Name | Value | | ----------- | -------------------------- | | Type | inquiry-task-error | | Title | Inquiry Task Error | | Description | Provided inquiry task is not valid. | | Template | Inquiry task is not valid: {note} | ## Package Error | Name | Value | | ----------- | -------------------------- | | Type | package-error | | Title | Package Error | | Description | A validation cannot be processed. | | Template | The data package has an error: {note} | ## Pipeline Error | Name | Value | | ----------- | -------------------------- | | Type | pipeline-error | | Title | Pipeline Error | | Description | Provided pipeline is not valid. | | Template | Pipeline is not valid: {note} | ## Step Error | Name | Value | | ----------- | -------------------------- | | Type | step-error | | Title | Step Error | | Description | Provided step is not valid | | Template | Step is not valid: {note} | ## Report Error | Name | Value | | ----------- | -------------------------- | | Type | report-error | | Title | Report Error | | Description | Provided report is not valid. | | Template | Report is not valid: {note} | ## Report Task Error | Name | Value | | ----------- | -------------------------- | | Type | report-task-error | | Title | Report Task Error | | Description | Provided report task is not valid. | | Template | Report task is not valid: {note} | ## Schema Error | Name | Value | | ----------- | -------------------------- | | Type | schema-error | | Title | Schema Error | | Description | Provided schema is not valid. | | Template | Schema is not valid: {note} | ## Field Error | Name | Value | | ----------- | -------------------------- | | Type | field-error | | Title | Field Error | | Description | Provided field is not valid. | | Template | Field is not valid: {note} | ## Stats Error | Name | Value | | ----------- | -------------------------- | | Type | stats-error | | Title | Stats Error | | Description | Stats object has an error. | | Template | Stats object has an error: {note} | ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'errors/resource',
+          'text': "# Resource Errors ## Resource Error | Name | Value | | ----------- | -------------------------- | | Type | resource-error | | Title | Resource Error | | Description | A validation cannot be processed. | | Template | The data resource has an error: {note} | ## Source Error | Name | Value | | ----------- | -------------------------- | | Type | source-error | | Title | Source Error | | Description | Data reading error because of not supported or inconsistent contents. | | Template | The data source has not supported or has inconsistent contents: {note} | ## Scheme Error | Name | Value | | ----------- | -------------------------- | | Type | scheme-error | | Title | Scheme Error | | Description | Data reading error because of incorrect scheme. | | Template | The data source could not be successfully loaded: {note} | ## Format Error | Name | Value | | ----------- | -------------------------- | | Type | format-error | | Title | Format Error | | Description | Data reading error because of incorrect format. | | Template | The data source could not be successfully parsed: {note} | ## Encoding Error | Name | Value | | ----------- | -------------------------- | | Type | encoding-error | | Title | Encoding Error | | Description | Data reading error because of an encoding problem. | | Template | The data source could not be successfully decoded: {note} | ## Compression Error | Name | Value | | ----------- | -------------------------- | | Type | compression-error | | Title | Compression Error | | Description | Data reading error because of a decompression problem. | | Template | The data source could not be successfully decompressed: {note} | ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'errors/data',
+          'text': "# Data Errors ## Data Error | Name | Value | | ----------- | -------------------------- | | Type | data-error | | Title | Data Error | | Description | There is a data error. | | Template | Data error: {note} | ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'errors/file',
+          'text': "# File Errors ## File Error | Name | Value | | ----------- | -------------------------- | | Type | file-error | | Title | File Error | | Description | There is a file error. | | Template | General file error: {note} | | Tags | #file | ## Hash Count Error | Name | Value | | ----------- | -------------------------- | | Type | hash-count | | Title | Hash Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected hash count: {note} | | Tags | #file | ## Byte Count Error | Name | Value | | ----------- | -------------------------- | | Type | byte-count | | Title | Byte Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected byte count: {note} | | Tags | #file | ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'errors/table',
+          'text': "# Table Errors ## Table Error | Name | Value | | ----------- | -------------------------- | | Type | table-error | | Title | Table Error | | Description | There is a table error. | | Template | General table error: {note} | | Tags | #table | ## Field Count Error | Name | Value | | ----------- | -------------------------- | | Type | field-count | | Title | Field Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected field count: {note} | | Tags | #table | ## Row Count Error | Name | Value | | ----------- | -------------------------- | | Type | row-count | | Title | Row Count Error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not match the expected row count: {note} | | Tags | #table | ## Table dimensions error | Name | Value | | ----------- | -------------------------- | | Type | table-dimensions | | Title | Table dimensions error | | Description | This error can happen if the data is corrupted. | | Template | The data source does not have the required dimensions: {note} | | Tags | #table | ## Deviated Value | Name | Value | | ----------- | -------------------------- | | Type | deviated-value | | Title | Deviated Value | | Description | The value is deviated. | | Template | There is a possible error because the value is deviated: {note} | | Tags | #table | ## Deviated cell | Name | Value | | ----------- | -------------------------- | | Type | deviated-cell | | Title | Deviated cell | | Description | The cell is deviated. | | Template | There is a possible error because the cell is deviated: {note} | | Tags | #table | ## Required Value | Name | Value | | ----------- | -------------------------- | | Type | required-value | | Title | Required Value | | Description | The required values are missing. | | Template | Required values not found: {note} | | Tags | #table | ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'errors/header',
+          'text': "# Header Errors ## Header Error | Name | Value | | ----------- | -------------------------- | | Type | header-error | | Title | Header Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #header | ## Blank Header | Name | Value | | ----------- | -------------------------- | | Type | blank-header | | Title | Blank Header | | Description | This header is empty. A header should contain at least one value. | | Template | Header is completely blank | | Tags | #table #header | ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'errors/label',
+          'text': "# Label Errors ## Label Error | Name | Value | | ----------- | -------------------------- | | Type | label-error | | Title | Label Error | | Description | Label Error | | Template | Label Error | | Tags | #table #header #label | ## Extra Label | Name | Value | | ----------- | -------------------------- | | Type | extra-label | | Title | Extra Label | | Description | The header of the data source contains label that does not exist in the provided schema. | | Template | There is an extra label \"{label}\" in header at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Missing Label | Name | Value | | ----------- | -------------------------- | | Type | missing-label | | Title | Missing Label | | Description | Based on the schema there should be a label that is missing in the data\u0027s header. | | Template | There is a missing label in the header\u0027s field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #header #label | ## Blank Label | Name | Value | | ----------- | -------------------------- | | Type | blank-label | | Title | Blank Label | | Description | A label in the header row is missing a value. Label should be provided and not be blank. | | Template | Label in the header in field at position \"{fieldNumber}\" is blank | | Tags | #table #header #label | ## Duplicate Label | Name | Value | | ----------- | -------------------------- | | Type | duplicate-label | | Title | Duplicate Label | | Description | Two columns in the header row have the same value. Column names should be unique. | | Template | Label \"{label}\" in the header at position \"{fieldNumber}\" is duplicated to a label: {note} | | Tags | #table #header #label | ## Incorrect Label | Name | Value | | ----------- | -------------------------- | | Type | incorrect-label | | Title | Incorrect Label | | Description | One of the data source header does not match the field name defined in the schema. | | Template | Label \"{label}\" in field {fieldName} at position \"{fieldNumber}\" does not match the field name in the schema | | Tags | #table #header #label | ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'errors/row',
+          'text': "# Row Errors ## Row Error | Name | Value | | ----------- | -------------------------- | | Type | row-error | | Title | Row Error | | Description | Row Error | | Template | Row Error | | Tags | #table #row | ## Blank Row | Name | Value | | ----------- | -------------------------- | | Type | blank-row | | Title | Blank Row | | Description | This row is empty. A row should contain at least one value. | | Template | Row at position \"{rowNumber}\" is completely blank | | Tags | #table #row | ## PrimaryKey Error | Name | Value | | ----------- | -------------------------- | | Type | primary-key | | Title | PrimaryKey Error | | Description | Values in the primary key fields should be unique for every row | | Template | Row at position \"{rowNumber}\" violates the primary key: {note} | | Tags | #table #row | ## ForeignKey Error | Name | Value | | ----------- | -------------------------- | | Type | foreign-key | | Title | ForeignKey Error | | Description | Values in the foreign key fields should exist in the reference table | | Template | Row at position \"{rowNumber}\" violates the foreign key: {note} | | Tags | #table #row | ## Duplicate Row | Name | Value | | ----------- | -------------------------- | | Type | duplicate-row | | Title | Duplicate Row | | Description | The row is duplicated. | | Template | Row at position {rowNumber} is duplicated: {note} | | Tags | #table #row | ## Row Constraint | Name | Value | | ----------- | -------------------------- | | Type | row-constraint | | Title | Row Constraint | | Description | The value does not conform to the row constraint. | | Template | The row at position {rowNumber} has an error: {note} | | Tags | #table #row | ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'errors/cell',
+          'text': "# Cell Errors ## Cell Error | Name | Value | | ----------- | -------------------------- | | Type | cell-error | | Title | Cell Error | | Description | Cell Error | | Template | Cell Error | | Tags | #table #row #cell | ## Extra Cell | Name | Value | | ----------- | -------------------------- | | Type | extra-cell | | Title | Extra Cell | | Description | This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has an extra value in field at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Missing Cell | Name | Value | | ----------- | -------------------------- | | Type | missing-cell | | Title | Missing Cell | | Description | This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns. | | Template | Row at position \"{rowNumber}\" has a missing cell in field \"{fieldName}\" at position \"{fieldNumber}\" | | Tags | #table #row #cell | ## Type Error | Name | Value | | ----------- | -------------------------- | | Type | type-error | | Title | Type Error | | Description | The value does not match the schema type and format for this field. | | Template | Type error in the cell \"{cell}\" in row \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Constraint Error | Name | Value | | ----------- | -------------------------- | | Type | constraint-error | | Title | Constraint Error | | Description | A field value does not conform to a constraint. | | Template | The cell \"{cell}\" in row at position \"{rowNumber}\" and field \"{fieldName}\" at position \"{fieldNumber}\" does not conform to a constraint: {note} | | Tags | #table #row #cell | ## Unique Error | Name | Value | | ----------- | -------------------------- | | Type | unique-error | | Title | Unique Error | | Description | This field is a unique field but it contains a value that has been used in another row. | | Template | Row at position \"{rowNumber}\" has unique constraint violation in field \"{fieldName}\" at position \"{fieldNumber}\": {note} | | Tags | #table #row #cell | ## Truncated Value | Name | Value | | ----------- | -------------------------- | | Type | truncated-value | | Title | Truncated Value | | Description | The value is possible truncated. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Forbidden Value | Name | Value | | ----------- | -------------------------- | | Type | forbidden-value | | Title | Forbidden Value | | Description | The value is forbidden. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Sequential Value | Name | Value | | ----------- | -------------------------- | | Type | sequential-value | | Title | Sequential Value | | Description | The value is not sequential. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Ascii Value | Name | Value | | ----------- | -------------------------- | | Type | ascii-value | | Title | Ascii Value | | Description | The cell contains non-ascii characters. | | Template | The cell {cell} in row at position {rowNumber} and field {fieldName} at position {fieldNumber} has an error: {note} | | Tags | #table #row #cell | ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': 'universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': '../blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': '../blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': '../blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': '../blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/index.html b/index.html
new file mode 100644
index 0000000000..ae47f52235
--- /dev/null
+++ b/index.html
@@ -0,0 +1,3481 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<meta name="description" content="Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive  Frictionless Standards .">
+<meta name="keywords" content="frictionless-py">
+<link rel="icon" href="assets/logo.png">
+<title>frictionless-py | Frictionless Framework</title>
+<link rel="stylesheet" href="https://unpkg.com/@fortawesome/fontawesome-free@5.15.4/css/all.min.css">
+<link rel="stylesheet" href="https://unpkg.com/bootstrap@4.6.0/dist/css/bootstrap.min.css">
+<link rel="stylesheet" href="https://unpkg.com/prismjs@1.23.0/themes/prism.css">
+<style>
+
+@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+
+/* Base */
+
+html {
+  font-family: sans-serif;
+  line-height: 1.15;
+  -webkit-text-size-adjust: 100%;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+}
+
+body {
+  margin: 0;
+  font-family: Roboto, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  font-size: 16px;
+  font-weight: 400;
+  line-height: 1.5;
+  color: #222;
+  text-align: left;
+  background-color: #fff;
+}
+
+*,
+::after,
+::before {
+	box-sizing: border-box;
+}
+
+ul,
+ol {
+	margin-top: 0;
+	margin-bottom: 1rem;
+}
+
+a {
+	color: #007bff;
+	text-decoration: none;
+	background-color: transparent;
+}
+
+a:hover {
+	color: #0056b3;
+	text-decoration: underline;
+}
+
+hr {
+  box-sizing: content-box;
+  height: 0;
+  overflow: visible;
+}
+
+p {
+  margin-top: 0;
+  margin-bottom: 1rem;
+}
+
+/* Main */
+
+#livemark-main {
+  padding: 24px 20px;
+  min-height: 100vh;
+}
+
+#livemark-main h1 {
+  margin-top: 4px !important;
+  padding-bottom: 12px !important;
+}
+
+#livemark-main h1 a.heading,
+#livemark-main h2 a.heading,
+#livemark-main h3 a.heading,
+#livemark-main h4 a.heading,
+#livemark-main h5 a.heading,
+#livemark-main h6 a.heading {
+  display: none;
+}
+
+#livemark-main h2:hover a.heading,
+#livemark-main h3:hover a.heading,
+#livemark-main h4:hover a.heading,
+#livemark-main h5:hover a.heading,
+#livemark-main h6:hover a.heading {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 a.heading:hover,
+#livemark-main h3 a.heading:hover,
+#livemark-main h4 a.heading:hover,
+#livemark-main h5 a.heading:hover,
+#livemark-main h6 a.heading:hover {
+  text-decoration: underline;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-main {
+    margin-left: 300px;
+    border-left: dashed 1px #ccc;
+  }
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-main {
+    margin-right: 300px;
+    border-right: dashed 1px #ccc;
+  }
+}
+
+#livemark-main .octicon {
+  display: inline-block;
+  fill: currentColor;
+  vertical-align: text-bottom;
+}
+
+#livemark-main .anchor {
+  float: left;
+  line-height: 1;
+  margin-left: -20px;
+  padding-right: 4px;
+}
+
+#livemark-main .anchor:focus {
+  outline: none;
+}
+
+#livemark-main h1 .octicon-link,
+#livemark-main h2 .octicon-link,
+#livemark-main h3 .octicon-link,
+#livemark-main h4 .octicon-link,
+#livemark-main h5 .octicon-link,
+#livemark-main h6 .octicon-link {
+  color: #1b1f23;
+  vertical-align: middle;
+  visibility: hidden;
+}
+
+#livemark-main h1:hover .anchor,
+#livemark-main h2:hover .anchor,
+#livemark-main h3:hover .anchor,
+#livemark-main h4:hover .anchor,
+#livemark-main h5:hover .anchor,
+#livemark-main h6:hover .anchor {
+  text-decoration: none;
+}
+
+#livemark-main h1:hover .anchor .octicon-link,
+#livemark-main h2:hover .anchor .octicon-link,
+#livemark-main h3:hover .anchor .octicon-link,
+#livemark-main h4:hover .anchor .octicon-link,
+#livemark-main h5:hover .anchor .octicon-link,
+#livemark-main h6:hover .anchor .octicon-link {
+  visibility: visible;
+}
+
+#livemark-main h1:hover .anchor .octicon-link:before,
+#livemark-main h2:hover .anchor .octicon-link:before,
+#livemark-main h3:hover .anchor .octicon-link:before,
+#livemark-main h4:hover .anchor .octicon-link:before,
+#livemark-main h5:hover .anchor .octicon-link:before,
+#livemark-main h6:hover .anchor .octicon-link:before {
+  width: 16px;
+  height: 16px;
+  content: ' ';
+  display: inline-block;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' version='1.1' width='16' height='16' aria-hidden='true'%3E%3Cpath fill-rule='evenodd' d='M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z'%3E%3C/path%3E%3C/svg%3E");
+}
+
+#livemark-main {
+  -ms-text-size-adjust: 100%;
+  -webkit-text-size-adjust: 100%;
+  line-height: 1.5;
+  color: #222;
+  font-family: Roboto, system-ui, -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;
+  line-height: 1.5;
+  word-wrap: break-word;
+}
+
+#livemark-main details {
+  display: block;
+}
+
+#livemark-main summary {
+  display: list-item;
+}
+
+#livemark-main a {
+  background-color: initial;
+}
+
+#livemark-main a:active,
+#livemark-main a:hover {
+  outline-width: 0;
+}
+
+#livemark-main strong {
+  font-weight: inherit;
+  font-weight: bolder;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+  margin: .67em 0;
+}
+
+#livemark-main img {
+  border-style: none;
+}
+
+#livemark-main code,
+#livemark-main kbd,
+#livemark-main pre {
+  font-family: monospace,monospace;
+  font-size: 1em;
+}
+
+#livemark-main hr {
+  box-sizing: initial;
+  height: 0;
+  overflow: visible;
+}
+
+#livemark-main input {
+  font: inherit;
+  margin: 0;
+}
+
+#livemark-main input {
+  overflow: visible;
+}
+
+#livemark-main [type=checkbox] {
+  box-sizing: border-box;
+  padding: 0;
+}
+
+#livemark-main * {
+  box-sizing: border-box;
+}
+
+#livemark-main input {
+  font-family: inherit;
+  font-size: inherit;
+  line-height: inherit;
+}
+
+#livemark-main a {
+  color: #0366d6;
+  text-decoration: none;
+}
+
+#livemark-main a:hover {
+  text-decoration: underline;
+}
+
+#livemark-main strong {
+  font-weight: 600;
+}
+
+#livemark-main hr {
+  height: 0;
+  margin: 15px 0;
+  overflow: hidden;
+  background: transparent;
+  border: 0;
+  border-bottom: 1px solid #dfe2e5;
+}
+
+#livemark-main hr:after,
+#livemark-main hr:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main hr:after {
+  clear: both;
+}
+
+/* NOTE: is it possible to find a better fix for not breaking TablePlugin's styles? */
+#livemark-main table:not(.dataTable) {
+  border-spacing: 0;
+  border-collapse: collapse;
+}
+
+#livemark-main td,
+#livemark-main th {
+  padding: 0;
+}
+
+#livemark-main details summary {
+  cursor: pointer;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main h1 {
+  font-size: 32px;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  font-weight: 600;
+}
+
+#livemark-main h2 {
+  font-size: 24px;
+}
+
+#livemark-main h3 {
+  font-size: 20px;
+}
+
+#livemark-main h3,
+#livemark-main h4 {
+  font-weight: 600;
+}
+
+#livemark-main h4 {
+  font-size: 16px;
+}
+
+#livemark-main h5 {
+  font-size: 14px;
+}
+
+#livemark-main h5,
+#livemark-main h6 {
+  font-weight: 600;
+}
+
+#livemark-main h6 {
+  font-size: 12px;
+}
+
+#livemark-main p {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+#livemark-main blockquote {
+  margin: 0;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 0;
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main ol ol,
+#livemark-main ul ol {
+  list-style-type: lower-roman;
+}
+
+#livemark-main ol ol ol,
+#livemark-main ol ul ol,
+#livemark-main ul ol ol,
+#livemark-main ul ul ol {
+  list-style-type: lower-alpha;
+}
+
+#livemark-main dd {
+  margin-left: 0;
+}
+
+#livemark-main code,
+#livemark-main pre {
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+}
+
+#livemark-main pre {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main input::-webkit-inner-spin-button,
+#livemark-main input::-webkit-outer-spin-button {
+  margin: 0;
+  -webkit-appearance: none;
+  appearance: none;
+}
+
+#livemark-main :checked+.radio-label {
+  position: relative;
+  z-index: 1;
+  border-color: #0366d6;
+}
+
+#livemark-main .border {
+  border: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .border-0 {
+  border: 0!important;
+}
+
+#livemark-main .border-bottom {
+  border-bottom: 1px solid #e1e4e8!important;
+}
+
+#livemark-main .rounded-1 {
+  border-radius: 3px!important;
+}
+
+#livemark-main .bg-white {
+  background-color: #fff!important;
+}
+
+#livemark-main .bg-gray-light {
+  background-color: #fafbfc!important;
+}
+
+#livemark-main .text-gray-light {
+  color: #666!important;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3,
+#livemark-main .px-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .px-3 {
+  padding-right: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .f6 {
+  font-size: 12px!important;
+}
+
+#livemark-main .lh-condensed {
+  line-height: 1.25!important;
+}
+
+#livemark-main .text-bold {
+  font-weight: 600!important;
+}
+
+#livemark-main .pl-c {
+  color: #666;
+}
+
+#livemark-main .pl-c1,
+#livemark-main .pl-s .pl-v {
+  color: #005cc5;
+}
+
+#livemark-main .pl-e,
+#livemark-main .pl-en {
+  color: #6f42c1;
+}
+
+#livemark-main .pl-s .pl-s1,
+#livemark-main .pl-smi {
+  color: #222;
+}
+
+#livemark-main .pl-ent {
+  color: #22863a;
+}
+
+#livemark-main .pl-k {
+  color: #d73a49;
+}
+
+#livemark-main .pl-pds,
+#livemark-main .pl-s,
+#livemark-main .pl-s .pl-pse .pl-s1,
+#livemark-main .pl-sr,
+#livemark-main .pl-sr .pl-cce,
+#livemark-main .pl-sr .pl-sra,
+#livemark-main .pl-sr .pl-sre {
+  color: #032f62;
+}
+
+#livemark-main .pl-smw,
+#livemark-main .pl-v {
+  color: #e36209;
+}
+
+#livemark-main .pl-bu {
+  color: #b31d28;
+}
+
+#livemark-main .pl-ii {
+  color: #fafbfc;
+  background-color: #b31d28;
+}
+
+#livemark-main .pl-c2 {
+  color: #fafbfc;
+  background-color: #d73a49;
+}
+
+#livemark-main .pl-c2:before {
+  content: "^M";
+}
+
+#livemark-main .pl-sr .pl-cce {
+  font-weight: 700;
+  color: #22863a;
+}
+
+#livemark-main .pl-ml {
+  color: #735c0f;
+}
+
+#livemark-main .pl-mh,
+#livemark-main .pl-mh .pl-en,
+#livemark-main .pl-ms {
+  font-weight: 700;
+  color: #005cc5;
+}
+
+#livemark-main .pl-mi {
+  font-style: italic;
+  color: #222;
+}
+
+#livemark-main .pl-mb {
+  font-weight: 700;
+  color: #222;
+}
+
+#livemark-main .pl-md {
+  color: #b31d28;
+  background-color: #ffeef0;
+}
+
+#livemark-main .pl-mi1 {
+  color: #22863a;
+  background-color: #f0fff4;
+}
+
+#livemark-main .pl-mc {
+  color: #e36209;
+  background-color: #ffebda;
+}
+
+#livemark-main .pl-mi2 {
+  color: #f6f8fa;
+  background-color: #005cc5;
+}
+
+#livemark-main .pl-mdr {
+  font-weight: 700;
+  color: #6f42c1;
+}
+
+#livemark-main .pl-ba {
+  color: #586069;
+}
+
+#livemark-main .pl-sg {
+  color: #959da5;
+}
+
+#livemark-main .pl-corl {
+  text-decoration: underline;
+  color: #032f62;
+}
+
+#livemark-main .mb-0 {
+  margin-bottom: 0!important;
+}
+
+#livemark-main .my-2 {
+  margin-bottom: 8px!important;
+}
+
+#livemark-main .my-2 {
+  margin-top: 8px!important;
+}
+
+#livemark-main .pl-0 {
+  padding-left: 0!important;
+}
+
+#livemark-main .py-0 {
+  padding-top: 0!important;
+  padding-bottom: 0!important;
+}
+
+#livemark-main .pl-1 {
+  padding-left: 4px!important;
+}
+
+#livemark-main .pl-2 {
+  padding-left: 8px!important;
+}
+
+#livemark-main .py-2 {
+  padding-top: 8px!important;
+  padding-bottom: 8px!important;
+}
+
+#livemark-main .pl-3 {
+  padding-left: 16px!important;
+}
+
+#livemark-main .pl-4 {
+  padding-left: 24px!important;
+}
+
+#livemark-main .pl-5 {
+  padding-left: 32px!important;
+}
+
+#livemark-main .pl-6 {
+  padding-left: 40px!important;
+}
+
+#livemark-main .pl-7 {
+  padding-left: 48px!important;
+}
+
+#livemark-main .pl-8 {
+  padding-left: 64px!important;
+}
+
+#livemark-main .pl-9 {
+  padding-left: 80px!important;
+}
+
+#livemark-main .pl-10 {
+  padding-left: 96px!important;
+}
+
+#livemark-main .pl-11 {
+  padding-left: 112px!important;
+}
+
+#livemark-main .pl-12 {
+  padding-left: 128px!important;
+}
+
+#livemark-main hr {
+  border-bottom-color: #eee;
+}
+
+#livemark-main kbd {
+  display: inline-block;
+  padding: 3px 5px;
+  font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  line-height: 10px;
+  color: #444d56;
+  vertical-align: middle;
+  background-color: #fafbfc;
+  border: 1px solid #d1d5da;
+  border-radius: 3px;
+  box-shadow: inset 0 -1px 0 #d1d5da;
+}
+
+#livemark-main:after,
+#livemark-main:before {
+  display: table;
+  content: "";
+}
+
+#livemark-main:after {
+  clear: both;
+}
+
+#livemark-main a:not([href]) {
+  color: inherit;
+  text-decoration: none;
+}
+
+#livemark-main blockquote,
+#livemark-main details,
+#livemark-main dl,
+#livemark-main ol,
+#livemark-main p,
+#livemark-main pre,
+#livemark-main table:not(.dataTable),
+#livemark-main ul {
+  margin-top: 0;
+  margin-bottom: 16px;
+}
+
+#livemark-main hr {
+  /* height: .25em; */
+  height: 1px;
+  padding: 0;
+  margin: 24px 0;
+  background-color: #e1e4e8;
+  border: 0;
+}
+
+#livemark-main blockquote {
+  padding: 0 1em;
+  color: #666;
+  border-left: .25em solid #dfe2e5;
+}
+
+#livemark-main blockquote>:first-child {
+  margin-top: 0;
+}
+
+#livemark-main blockquote>:last-child {
+  margin-bottom: 0;
+}
+
+#livemark-main h1,
+#livemark-main h2,
+#livemark-main h3,
+#livemark-main h4,
+#livemark-main h5,
+#livemark-main h6 {
+  margin-top: 24px;
+  margin-bottom: 16px;
+  font-weight: 600;
+  line-height: 1.25;
+}
+
+#livemark-main h1 {
+  font-size: 2em;
+}
+
+#livemark-main h1,
+#livemark-main h2 {
+  padding-bottom: .3em;
+  border-bottom: 1px solid #eaecef;
+}
+
+#livemark-main h2 {
+  font-size: 1.5em;
+}
+
+#livemark-main h3 {
+  font-size: 1.25em;
+}
+
+#livemark-main h4 {
+  font-size: 1em;
+}
+
+#livemark-main h5 {
+  font-size: .875em;
+}
+
+#livemark-main h6 {
+  font-size: .85em;
+  color: #666;
+}
+
+#livemark-main ol,
+#livemark-main ul {
+  padding-left: 2em;
+}
+
+#livemark-main ol ol,
+#livemark-main ol ul,
+#livemark-main ul ol,
+#livemark-main ul ul {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+#livemark-main li {
+  word-wrap: break-all;
+}
+
+#livemark-main li>p {
+  margin-top: 16px;
+}
+
+#livemark-main li+li {
+  margin-top: .25em;
+}
+
+#livemark-main dl {
+  padding: 0;
+}
+
+#livemark-main dl dt {
+  padding: 0;
+  margin-top: 16px;
+  font-size: 1em;
+  font-style: italic;
+  font-weight: 600;
+}
+
+#livemark-main dl dd {
+  padding: 0 16px;
+  margin-bottom: 16px;
+}
+
+#livemark-main table:not(.dataTable) {
+  display: block;
+  width: 100%;
+  overflow: auto;
+}
+
+#livemark-main table:not(.dataTable) th {
+  font-weight: 600;
+}
+
+#livemark-main table:not(.dataTable) td,
+#livemark-main table:not(.dataTable) th {
+  padding: 6px 13px;
+  border: 1px solid #dfe2e5;
+}
+
+#livemark-main table:not(.dataTable) tr {
+  background-color: #fff;
+  border-top: 1px solid #c6cbd1;
+}
+
+#livemark-main table:not(.dataTable) tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+
+#livemark-main img {
+  max-width: 100%;
+  box-sizing: initial;
+  background-color: #fff;
+}
+
+#livemark-main img[align=right] {
+  padding-left: 20px;
+}
+
+#livemark-main img[align=left] {
+  padding-right: 20px;
+}
+
+#livemark-main code {
+  padding: .2em .4em;
+  margin: 0;
+  font-size: 85%;
+  background-color: rgba(27,31,35,.05);
+  border-radius: 3px;
+}
+
+#livemark-main pre {
+  word-wrap: normal;
+}
+
+#livemark-main pre>code {
+  padding: 0;
+  margin: 0;
+  font-size: 100%;
+  word-break: normal;
+  white-space: pre;
+  background: transparent;
+  border: 0;
+}
+
+#livemark-main .highlight {
+  margin-bottom: 16px;
+}
+
+#livemark-main .highlight pre {
+  margin-bottom: 0;
+  word-break: normal;
+}
+
+#livemark-main .highlight pre,
+#livemark-main pre {
+  padding: 16px;
+  overflow: auto;
+  font-size: 85%;
+  line-height: 1.45;
+  background-color: #f6f8fa;
+  border-radius: 3px;
+}
+
+#livemark-main pre code {
+  display: inline;
+  max-width: auto;
+  padding: 0;
+  margin: 0;
+  overflow: visible;
+  line-height: inherit;
+  word-wrap: normal;
+  background-color: initial;
+  border: 0;
+}
+
+#livemark-main .commit-tease-sha {
+  display: inline-block;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 90%;
+  color: #444d56;
+}
+
+#livemark-main .full-commit .btn-outline:not(:disabled):hover {
+  color: #005cc5;
+  border-color: #005cc5;
+}
+
+#livemark-main .blob-wrapper {
+  overflow-x: auto;
+  overflow-y: hidden;
+}
+
+#livemark-main .blob-wrapper-embedded {
+  max-height: 240px;
+  overflow-y: auto;
+}
+
+#livemark-main .blob-num {
+  width: 1%;
+  min-width: 50px;
+  padding-right: 10px;
+  padding-left: 10px;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  line-height: 20px;
+  color: rgba(27,31,35,.3);
+  text-align: right;
+  white-space: nowrap;
+  vertical-align: top;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  user-select: none;
+}
+
+#livemark-main .blob-num:hover {
+  color: rgba(27,31,35,.6);
+}
+
+#livemark-main .blob-num:before {
+  content: attr(data-line-number);
+}
+
+#livemark-main .blob-code {
+  position: relative;
+  padding-right: 10px;
+  padding-left: 10px;
+  line-height: 20px;
+  vertical-align: top;
+}
+
+#livemark-main .blob-code-inner {
+  overflow: visible;
+  font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;
+  font-size: 12px;
+  color: #222;
+  word-wrap: normal;
+  white-space: pre;
+}
+
+#livemark-main .pl-token.active,
+#livemark-main .pl-token:hover {
+  cursor: pointer;
+  background: #ffea7f;
+}
+
+#livemark-main .tab-size[data-tab-size="1"] {
+  -moz-tab-size: 1;
+  tab-size: 1;
+}
+
+#livemark-main .tab-size[data-tab-size="2"] {
+  -moz-tab-size: 2;
+  tab-size: 2;
+}
+
+#livemark-main .tab-size[data-tab-size="3"] {
+  -moz-tab-size: 3;
+  tab-size: 3;
+}
+
+#livemark-main .tab-size[data-tab-size="4"] {
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+#livemark-main .tab-size[data-tab-size="5"] {
+  -moz-tab-size: 5;
+  tab-size: 5;
+}
+
+#livemark-main .tab-size[data-tab-size="6"] {
+  -moz-tab-size: 6;
+  tab-size: 6;
+}
+
+#livemark-main .tab-size[data-tab-size="7"] {
+  -moz-tab-size: 7;
+  tab-size: 7;
+}
+
+#livemark-main .tab-size[data-tab-size="8"] {
+  -moz-tab-size: 8;
+  tab-size: 8;
+}
+
+#livemark-main .tab-size[data-tab-size="9"] {
+  -moz-tab-size: 9;
+  tab-size: 9;
+}
+
+#livemark-main .tab-size[data-tab-size="10"] {
+  -moz-tab-size: 10;
+  tab-size: 10;
+}
+
+#livemark-main .tab-size[data-tab-size="11"] {
+  -moz-tab-size: 11;
+  tab-size: 11;
+}
+
+#livemark-main .tab-size[data-tab-size="12"] {
+  -moz-tab-size: 12;
+  tab-size: 12;
+}
+
+#livemark-main .task-list-item {
+  list-style-type: none;
+}
+
+#livemark-main .task-list-item+.task-list-item {
+  margin-top: 3px;
+}
+
+#livemark-main .task-list-item input {
+  margin: 0 .2em .25em -1.6em;
+  vertical-align: middle;
+}
+
+/* Left */
+
+#livemark-left {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  left: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-left::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-left > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 768px) {
+  #livemark-left {
+    visibility: visible;
+  }
+}
+
+/* Right */
+
+#livemark-right {
+  visibility: hidden;
+  position: fixed;
+  width: 300px;
+  padding: 10px 40px;
+  padding-bottom: 100px;
+  right: 0px;
+  top: 25px;
+  font-size: 125%;
+  height: 100vh;
+  overflow-y: auto;
+  scrollbar-width: none;  /* Firefox */
+  -ms-overflow-style: none;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: we need to move it to DisplayPlugin (currently unsetting doesn't work) */
+body:not(.with-readability) #livemark-right::-webkit-scrollbar {
+  display: none; /* Chrome; Safari */
+}
+
+#livemark-right > div:not(:last-child) {
+  border-bottom: 1px solid #eaecef;
+  padding-bottom: 15px;
+  margin-bottom: 15px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-right {
+    visibility: visible;
+  }
+}
+
+</style>
+<style>
+
+
+
+</style>
+<style>
+
+#livemark-brand {
+  color: #888;
+}
+
+#livemark-brand ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-brand li {
+  list-style: none;
+}
+
+#livemark-brand a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-brand a.active {
+  font-weight: 700;
+}
+
+#livemark-brand a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-pages {
+  color: #888;
+}
+
+#livemark-pages ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-pages ul.secondary {
+  margin-left: 20px;
+  display: none;
+}
+
+#livemark-pages li.active ul.secondary {
+  display: block;
+}
+
+#livemark-pages li {
+  list-style: none;
+}
+
+#livemark-pages a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-pages li.active > a {
+  font-weight: 700;
+}
+
+#livemark-pages li.group.active > a {
+  font-weight: normal;
+}
+
+#livemark-pages li.group a.primary::after {
+  content: "\f054";
+  font-size: 16px;
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  position: absolute;
+  top: 2px;
+  right: 0px;
+}
+
+#livemark-pages li.group.active a.primary::after {
+  content: "\f078";
+}
+
+#livemark-pages a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-reference {
+  display: block;
+  /* font-family: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", "Fira Mono", "Droid Sans Mono", "Courier New", monospace;; */
+}
+
+.livemark-reference-heading {
+    background-color: #f6f8fa;
+    font-family: "Roboto Mono", ui-monospace, monospace;
+    padding: 1em 0;
+}
+
+</style>
+<style>
+
+#livemark-notes {
+  color: #aaa;
+  text-align: right;
+  font-size: 14px;
+  float: right;
+  visibility: hidden;
+}
+
+@media (min-width: 768px) {
+  #livemark-notes {
+    visibility: visible;
+  }
+}
+
+#livemark-notes a {
+  color: inherit;
+}
+
+#livemark-notes a:hover {
+  color: #80b2e6;
+}
+
+#livemark-notes a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #ccc;
+  font-size: 10px;
+  margin-left: -1px;
+}
+
+#livemark-notes a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-signs {
+  color: #888;
+  border-top: 1px solid #eaecef;
+  margin-top: 24px;
+  padding-top: 20px;
+  height: 50px;
+}
+
+#livemark-signs .next {
+  float: right;
+}
+
+#livemark-signs a {
+  font-size: 20px;
+  color: currentColor;
+}
+
+#livemark-signs a:hover {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+#livemark-rating {
+  height: 46px;
+}
+
+#livemark-rating iframe {
+  margin-top: -5px;
+  border: none;
+  opacity: 0.5;
+}
+
+</style>
+<style>
+
+#livemark-about {
+  color: #888;
+}
+
+</style>
+<style>
+
+#livemark-links {
+  color: #888;
+}
+
+#livemark-links ul {
+  overflow: hidden;
+  position: relative;
+  padding-left: 0;
+  margin: 0;
+}
+
+#livemark-links li {
+  list-style: none;
+}
+
+#livemark-links a {
+  display: inline-block;
+  color: currentColor;
+  position: relative;
+  width: 100%;
+  line-height: 100%;
+  padding-top: 5px;
+  padding-bottom: 5px;
+}
+
+#livemark-links a:hover {
+  color: #80b2e6;
+}
+
+#livemark-links a[target="_blank"]:after {
+  content: "\f35d";
+  font-family: "Font Awesome 5 Free";
+  font-weight: 900;
+  vertical-align: text-top;
+  text-decoration: none;
+  display: inline-block;
+  color: #aaa;
+  font-size: 12px;
+  margin-left: -2px;
+}
+
+#livemark-links a[target="_blank"]:hover:after {
+  color: #80b2e6;
+}
+
+</style>
+<style>
+
+.livemark-audio {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-blog-item {
+  margin-bottom: 16px;
+  text-align: justify;
+}
+
+.livemark-blog-item h2 a:not(.heading) {
+  color: #222 !important;
+}
+
+</style>
+<style>
+
+#livemark-cards .modal-lg {
+    max-width: 1000px;
+}
+
+</style>
+
+<script defer data-domain="framework.frictionlessdata.io" src="https://plausible.io/js/plausible.js"></script>
+<style>
+
+#livemark-display {
+  display: flex;
+  position: fixed;
+  visibility: hidden;
+  justify-content: space-between;
+  font-size: 16px;
+  color: #888;
+  width: 240px;
+  bottom: 20px;
+  right: 30px;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-display {
+    visibility: visible;
+  }
+}
+
+#livemark-display .control {
+  cursor: pointer;
+  background-color:#fff;
+  box-shadow: 0px 7px 10px #eee;
+  border-radius: 50%;
+  border: solid 1px #ddd;
+  z-index: 100;
+}
+
+#livemark-display .control .fa {
+  display: inline-block !important;
+  opacity: 1 !important;
+  padding: 15px;
+}
+
+.with-readability {
+  font-size: 20px;
+}
+
+.with-readability #livemark-main {
+  color: #000;
+}
+
+.with-readability #livemark-left > *,
+.with-readability #livemark-right > * {
+  color: #444;
+}
+
+.with-readability #livemark-left,
+.with-readability #livemark-right {
+  scrollbar-width: unset;  /* Firefox */
+  -ms-overflow-style: unset;  /* Internet Explorer 10+ */
+}
+
+/* NOTE: */
+/* temporarily implemented in HtmlPlugin */
+/* .with-readability #livemark-left::-webkit-scrollbar, */
+/* .with-readability #livemark-right::-webkit-scrollbar { */
+  /* display: unset !important; [> Chrome; Safari <] */
+/* } */
+
+</style>
+<style>
+
+.livemark-image {
+  padding-bottom: 10px;
+}
+
+</style>
+<style>
+
+.livemark-infinity {
+  display: none;
+}
+
+</style>
+<style>
+
+#livemark-mobile {
+  position: absolute;
+  visibility: hidden;
+  z-index: 10000;
+  /* NOTE: We can't use "right" because of Mobile Chrome and #34 */
+  left: calc(100vw - 60px);
+  top: 24px;
+}
+
+#livemark-mobile .stack {
+  margin-top: 15px;
+  display: block;
+  cursor: pointer;
+}
+
+#livemark-mobile .bar {
+  display: block;
+  width: 25px;
+  height: 3px;
+  margin: 5px auto;
+  -webkit-transition: all 0.3s ease-in-out;
+  transition: all 0.3s ease-in-out;
+  background-color: #aaa;
+}
+
+@media only screen and (max-width: 768px) {
+  #livemark-mobile {
+    visibility: visible;
+  }
+
+  #livemark-mobile.active {
+    position: fixed;
+  }
+
+  #livemark-mobile.active .bar:nth-child(2) {
+    opacity: 0;
+  }
+
+  #livemark-mobile.active .bar:nth-child(1) {
+    transform: translateY(8px) rotate(45deg);
+  }
+
+  #livemark-mobile.active .bar:nth-child(3) {
+    transform: translateY(-8px) rotate(-45deg);
+  }
+
+  #livemark-left {
+      position: fixed;
+      top: 0;
+      left: -100vw;
+      padding-top: 35px;
+      background-color: #fff;
+      width: 100vw;
+      border-radius: 10px;
+      text-align: center;
+      transition: 0.3s;
+      box-shadow: 0 10px 27px rgba(0, 0, 0, 0.05);
+      visibility: visible;
+      z-index: 1000;
+  }
+
+  #livemark-left.active {
+    left: 0;
+  }
+}
+
+</style>
+<link rel="stylesheet" href="https://unpkg.com/paginationjs@2.1.5/dist/pagination.css">
+<style>
+
+.livemark-pagination {
+  display: none;
+}
+
+</style>
+<style>
+
+.livemark-remark {
+  display: block;
+}
+
+</style>
+<style>
+
+#livemark-search {
+  position: fixed;
+  left: 30px;
+  width: 240px;
+  bottom: 20px;
+  z-index:100;
+  visibility: hidden;
+}
+
+@media only screen and (min-width: 992px) {
+  #livemark-search {
+    visibility: visible;
+  }
+}
+
+#livemark-search-input {
+  width: 100%;
+  outline: none;
+  font-size: 20px;
+  padding: 7px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  color: #888;
+}
+
+#livemark-search-input::placeholder {
+  color: #888;
+}
+
+#livemark-search-input::-webkit-search-decoration,
+#livemark-search-input::-webkit-search-cancel-button,
+#livemark-search-input::-webkit-search-results-button,
+#livemark-search-input::-webkit-search-results-decoration {
+  -webkit-appearance:none;
+}
+
+#livemark-search-output {
+  visibility: hidden;
+  width: 100%;
+  font-size: 16px;
+  padding: 10px 10px;
+  border-radius: 20px;
+  border: solid 1px #ddd;
+  box-shadow: 0px 7px 10px #eee;
+  background: white;
+  margin-bottom: 10px;
+}
+
+#livemark-search-output ul {
+  margin: 0;
+  padding: 0;
+  list-style-type: none;
+}
+
+#livemark-search-output li.active {
+  font-size: 20px;
+  font-weight: bold;
+}
+
+#livemark-search-output a {
+  color: #5CC820;
+  text-decoration: underline;
+}
+
+.livemark-search-found {
+  background-color: #5CC820;
+  color: white;
+  font-weight: bold;
+  padding: 5px;
+  border-radius: 5px;
+}
+
+</style>
+<style>
+
+#livemark-main h2 .livemark-source-button {
+  float: right;
+  display: none;
+}
+
+#livemark-main h2:hover .livemark-source-button {
+  display: inline;
+  margin-left: 8px;
+  color: #aaa;
+  font-weight: normal;
+  text-decoration: none;
+}
+
+#livemark-main h2 .livemark-source-button:hover {
+  text-decoration: underline;
+}
+
+.livemark-source-section {
+  border: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.nav-tabs {
+    padding-left: 0 !important;
+}
+
+.nav-item {
+    margin-top: 0 !important;
+}
+
+</style>
+<style>
+
+.livemark-task pre:first-child {
+  margin-bottom: 0 !important;
+  border-bottom: dashed 1px #ccc;
+}
+
+</style>
+<style>
+
+.livemark-video {
+  padding-bottom: 10px;
+}
+
+</style>
+</head>
+<body>
+<div id="livemark-left">
+
+<div id="livemark-brand">
+  <ul>
+    <li>
+      <a class="active" href=".">
+        Frictionless Framework
+      </a>
+    </li>
+  </ul>
+</div>
+
+<div id="livemark-pages">
+  <ul class="primary">
+        <li class="primary  active">
+      <a href="index.html" class="primary">
+        Introduction
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="docs/getting-started.html" class="primary">
+        Getting Started
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="docs/basic-examples.html" class="primary">
+        Basic Examples
+      </a>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Management
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/guides/describing-data.html" class="secondary">
+            Describing Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/extracting-data.html" class="secondary">
+            Extracting Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/validating-data.html" class="secondary">
+            Validating Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/guides/transforming-data.html" class="secondary">
+            Transforming Data
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Console
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/console/overview.html" class="secondary">
+            Overview
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/describe.html" class="secondary">
+            Describe
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/convert.html" class="secondary">
+            Convert
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/explore.html" class="secondary">
+            Explore
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/extract.html" class="secondary">
+            Extract
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/index.html" class="secondary">
+            Index
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/list.html" class="secondary">
+            List
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/publish.html" class="secondary">
+            Publish
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/query.html" class="secondary">
+            Query
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/script.html" class="secondary">
+            Script
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/console/validate.html" class="secondary">
+            Validate
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Working in Python
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/framework/actions.html" class="secondary">
+            Actions
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/catalog.html" class="secondary">
+            Catalog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/package.html" class="secondary">
+            Package
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/dialect.html" class="secondary">
+            Dialect
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/schema.html" class="secondary">
+            Schema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/checklist.html" class="secondary">
+            Checklist
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/pipeline.html" class="secondary">
+            Pipeline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/detector.html" class="secondary">
+            Detector
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/inquiry.html" class="secondary">
+            Inquiry
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/report.html" class="secondary">
+            Report
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/framework/error.html" class="secondary">
+            Error
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Advanced Topics
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/advanced/design.html" class="secondary">
+            Design
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/advanced/system.html" class="secondary">
+            System
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/advanced/extending.html" class="secondary">
+            Extension
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Resources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/resources/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/text.html" class="secondary">
+            Text
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/resources/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Sources
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/schemes/aws.html" class="secondary">
+            Aws
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/buffer.html" class="secondary">
+            Buffer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/local.html" class="secondary">
+            Local
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/multipart.html" class="secondary">
+            Multipart
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/remote.html" class="secondary">
+            Remote
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/schemes/stream.html" class="secondary">
+            Stream
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Formats
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/formats/csv.html" class="secondary">
+            Csv
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/erd.html" class="secondary">
+            Erd
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/excel.html" class="secondary">
+            Excel
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/gsheets.html" class="secondary">
+            Gsheets
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/html.html" class="secondary">
+            Html
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/inline.html" class="secondary">
+            Inline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/json.html" class="secondary">
+            Json
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/jsonschema.html" class="secondary">
+            JsonSchema
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/markdown.html" class="secondary">
+            Markdown
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/ods.html" class="secondary">
+            Ods
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/pandas.html" class="secondary">
+            Pandas
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/parquet.html" class="secondary">
+            Parquet
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/spss.html" class="secondary">
+            Spss
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/sql.html" class="secondary">
+            Sql
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/yaml.html" class="secondary">
+            Yaml
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/formats/zip.html" class="secondary">
+            Zip
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Portals
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/portals/ckan.html" class="secondary">
+            Ckan
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/portals/github.html" class="secondary">
+            Github
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/portals/zenodo.html" class="secondary">
+            Zenodo
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Checks
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/checks/baseline.html" class="secondary">
+            Baseline
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/checks/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Steps
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/steps/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/field.html" class="secondary">
+            Field
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/steps/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Fields
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/fields/any.html" class="secondary">
+            Any
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/array.html" class="secondary">
+            Array
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/boolean.html" class="secondary">
+            Boolean
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/date.html" class="secondary">
+            Date
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/datetime.html" class="secondary">
+            Datetime
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/duration.html" class="secondary">
+            Duration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/geojson.html" class="secondary">
+            Geojson
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/geopoint.html" class="secondary">
+            Geopoint
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/integer.html" class="secondary">
+            Integer
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/number.html" class="secondary">
+            Number
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/object.html" class="secondary">
+            Object
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/string.html" class="secondary">
+            String
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/time.html" class="secondary">
+            Time
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/year.html" class="secondary">
+            Year
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/fields/yearmonth.html" class="secondary">
+            Yearmonth
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Data Errors
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/errors/metadata.html" class="secondary">
+            Metadata
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/resource.html" class="secondary">
+            Resource
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/data.html" class="secondary">
+            Data
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/file.html" class="secondary">
+            File
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/table.html" class="secondary">
+            Table
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/header.html" class="secondary">
+            Header
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/label.html" class="secondary">
+            Label
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/row.html" class="secondary">
+            Row
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/errors/cell.html" class="secondary">
+            Cell
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary group ">
+      <a href="" class="primary">
+        Codebase
+      </a>
+            <ul class="secondary">
+                <li class="secondary ">
+          <a href="docs/codebase/authors.html" class="secondary">
+            Authors
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/license.html" class="secondary">
+            License
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/migration.html" class="secondary">
+            Migration
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/changelog.html" class="secondary">
+            Changelog
+          </a>
+        </li>
+                <li class="secondary ">
+          <a href="docs/codebase/contributing.html" class="secondary">
+            Contributing
+          </a>
+        </li>
+              </ul>
+          </li>
+        <li class="primary  ">
+      <a href="docs/universe.html" class="primary">
+        Universe
+      </a>
+          </li>
+        <li class="primary  ">
+      <a href="blog/index.html" class="primary">
+        Blog
+      </a>
+          </li>
+      </ul>
+</div>
+</div>
+<div id="livemark-main">
+<div id="livemark-notes">
+      <a href="https://github.com/frictionlessdata/frictionless-py/edit/main/README.md" target="_blank">Edit page </a> in <a href="https://livemark.frictionlessdata.io" target="_blank"> Livemark </a><br>
+    (2023-07-18 13:48)
+</div>
+
+<h1>frictionless-py</h1>
+<p><a href="https://github.com/frictionlessdata/frictionless-py/actions"><img src="https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main" alt="Build"></a>
+<a href="https://codecov.io/gh/frictionlessdata/frictionless-py"><img src="https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main" alt="Coverage"></a>
+<a href="https://pypi.python.org/pypi/frictionless"><img src="https://img.shields.io/pypi/v/frictionless.svg" alt="Release"></a>
+<a href="https://zenodo.org/badge/latestdoi/28409905"><img src="https://zenodo.org/badge/28409905.svg" alt="Citation"></a>
+<a href="https://github.com/frictionlessdata/frictionless-py"><img src="https://img.shields.io/badge/codebase-github-brightgreen" alt="Codebase"></a>
+<a href="https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg"><img src="https://img.shields.io/badge/support-slack-brightgreen" alt="Support"></a></p>
+<div data-type="primary"><div class="livemark-remark">
+  <div class="alert alert-primary" role="alert">
+    Migrating from an older version? Please read <strong><a href="blog/2022/08-22-frictionless-framework-v5.html">v5</a></strong> announcement and migration guide.
+  </div>
+</div></div><p>Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive <a href="https://specs.frictionlessdata.io/">Frictionless Standards</a>.</p>
+<h2>Purpose</h2>
+<ul>
+<li><strong>Describe your data</strong>: You can infer, edit and save metadata of your data tables. It's a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details.</li>
+<li><strong>Extract your data</strong>: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others.</li>
+<li><strong>Validate your data</strong>: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process.</li>
+<li><strong>Transform your data</strong>: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data.</li>
+</ul>
+<h2>Features</h2>
+<ul>
+<li>Open Source (MIT)</li>
+<li>Powerful Python framework</li>
+<li>Convenient command-line interface</li>
+<li>Low memory consumption for data of any size</li>
+<li>Reasonable performance on big data</li>
+<li>Support for compressed files</li>
+<li>Custom checks and formats</li>
+<li>Fully pluggable architecture</li>
+<li>The included API server</li>
+<li>More than 1000+ tests</li>
+</ul>
+<h2>Installation</h2>
+<div><pre><code class="language-bash">$ pip install frictionless
+</code></pre>
+</div><h2>Example</h2>
+<div><pre><code class="language-bash">$ frictionless validate data/invalid.csv
+[invalid] data/invalid.csv
+
+  row    field  code              message
+-----  -------  ----------------  --------------------------------------------
+             3  blank-header      Header in field at position "3" is blank
+             4  duplicate-header  Header "name" in field "4" is duplicated
+    2        3  missing-cell      Row "2" has a missing cell in field "field3"
+    2        4  missing-cell      Row "2" has a missing cell in field "name2"
+    3        3  missing-cell      Row "3" has a missing cell in field "field3"
+    3        4  missing-cell      Row "3" has a missing cell in field "name2"
+    4           blank-row         Row "4" is completely blank
+    5        5  extra-cell        Row "5" has an extra value in field  "5"
+</code></pre>
+</div><h2>Documentation</h2>
+<p>Please visit our documentation portal:</p>
+<ul>
+<li><a href="https://framework.frictionlessdata.io">https://framework.frictionlessdata.io</a></li>
+</ul>
+
+<div id="livemark-signs">
+  <div>
+        <div class="next">
+      <a href="docs/getting-started.html">
+        Getting Started »
+      </a>
+    </div>
+          </div>
+</div>
+</div>
+<div id="livemark-right">
+
+<div id="livemark-rating">
+  <iframe src="https://ghbtns.com/github-btn.html?user=frictionlessdata&amp;repo=frictionless-py&amp;type=star&amp;count=true&amp;size=large" width="160px" height="30px" title="GitHub">
+  </iframe>
+</div>
+
+<div id="livemark-about">
+  <div>
+    Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data
+  </div>
+</div>
+
+<div id="livemark-links">
+  <ul>
+        <li>
+      <a href="https://v4.framework.frictionlessdata.io/docs/guides/guides-overview" target="_blank">
+        Framework (v4) (docs)
+      </a>
+    </li>
+        <li>
+      <a href="https://okfn.org" target="_blank">
+        Open Knowledge
+      </a>
+    </li>
+        <li>
+      <a href="https://frictionlessdata.io" target="_blank">
+        Frictionless
+      </a>
+    </li>
+        <li>
+      <a href="https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg" target="_blank">
+        Support
+      </a>
+    </li>
+        <li>
+      <a href="https://github.com/frictionlessdata/frictionless-py/issues" target="_blank">
+        Report
+      </a>
+    </li>
+        <li>
+      <a href="https://github.com/frictionlessdata/frictionless-py/fork" target="_blank">
+        Fork
+      </a>
+    </li>
+      
+</ul></div>
+</div>
+<script src="https://unpkg.com/lodash@4.17.21/lodash.min.js"></script>
+<script src="https://unpkg.com/jquery@3.6.0/dist/jquery.min.js"></script>
+<script src="https://unpkg.com/popper.js@1.16.1/dist/umd/popper.min.js"></script>
+<script src="https://unpkg.com/bootstrap@4.6.0/dist/js/bootstrap.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/components/prism-core.min.js"></script>
+<script src="https://unpkg.com/prismjs@1.23.0/plugins/autoloader/prism-autoloader.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const content = document.querySelector("#livemark-main");
+  const headings = content.querySelectorAll("h1, h2, h3, h4, h5, h6, h7");
+  const headingMap = {};
+
+  // Add identifiers
+  Array.prototype.forEach.call(headings, function (heading) {
+    const id = heading.id
+      ? heading.id
+      : heading.textContent
+          .trim()
+          .toLowerCase()
+          .split(" ")
+          .join("-")
+          .replace(/[!@#$%^&*():]/gi, "")
+          .replace(/\//gi, "-");
+    headingMap[id] = !isNaN(headingMap[id]) ? ++headingMap[id] : 0;
+    if (headingMap[id]) {
+      heading.id = id + "-" + headingMap[id];
+    } else {
+      heading.id = id;
+    }
+  });
+
+  // Add links
+  Array.prototype.forEach.call(headings, function (heading) {
+    const link = document.createElement("a");
+    link.href = "#" + heading.id;
+    link.innerText = "#";
+    link.classList.add("heading");
+    heading.appendChild(link);
+  });
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const groups = $("#livemark-pages li.group");
+  for (const group of groups) {
+    $(group)
+      .children("a")
+      .click((ev) => {
+        ev.preventDefault();
+        $(group).toggleClass("active");
+        // $(group).find(".fa").toggleClass("fa-chevron-right");
+        // $(group).find(".fa").toggleClass("fa-chevron-down");
+      });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const handlePopstate = async () => {
+    const href = location.hash;
+    if (href.startsWith("#card=")) {
+      const code = href.split("=")[1];
+      const response = await fetch(`/assets/cards/${code}.html`);
+      const html = await response.text();
+      $("#livemark-cards .modal-title").html("");
+      $("#livemark-cards .modal-body").html(html);
+      $("#livemark-cards h1").appendTo("#livemark-cards .modal-title");
+      $("#livemark-cards .modal").modal();
+      $("#livemark-cards .modal").on("hidden.bs.modal", () => {
+        history.pushState("", document.title, window.location.pathname);
+      });
+    }
+  };
+  window.addEventListener("popstate", handlePopstate);
+  handlePopstate();
+});
+
+</script>
+
+<div id="livemark-cards">
+  <div class="modal fade" tabindex="-1" role="dialog">
+    <div class="modal-dialog modal-lg" role="document">
+      <div class="modal-content">
+        <div class="modal-header">
+          <h5 class="modal-title"></h5>
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+            <span aria-hidden="true">×</span>
+          </button>
+        </div>
+        <div class="modal-body">
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+<script src="https://unpkg.com/ue-scroll-js@2.0.2/dist/ue-scroll.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Init
+  const readability = localStorage.getItem("livemark-display-readability");
+  if (readability === "plus") {
+    document.body.classList.add("with-readability");
+  } else {
+    document.body.classList.remove("with-readability");
+  }
+
+  // Plus
+  document
+    .getElementById("livemark-display-plus")
+    .addEventListener("click", function () {
+      document.body.classList.add("with-readability");
+      localStorage.setItem("livemark-display-readability", "plus");
+    });
+
+  // Minus
+  document
+    .getElementById("livemark-display-minus")
+    .addEventListener("click", function () {
+      document.body.classList.remove("with-readability");
+      localStorage.setItem("livemark-display-readability", "minus");
+    });
+
+  // Print
+  document
+    .getElementById("livemark-display-print")
+    .addEventListener("click", function () {
+      window.print();
+    });
+
+  // Scroll
+  const scrollSpeed = parseInt("10");
+  UeScroll.init({ element: "#livemark-display-scroll .fa", scrollSpeed });
+});
+
+</script>
+
+<div id="livemark-display">
+  <div class="control" id="livemark-display-print" title="Print">
+    <span class="fa fa-print"></span>
+  </div>
+  <div class="control" id="livemark-display-plus" title="Increase readability">
+    <span class="fa fa-plus"></span>
+  </div>
+  <div class="control" id="livemark-display-minus" title="Decrease readability">
+    <span class="fa fa-minus"></span>
+  </div>
+  <div class="control" id="livemark-display-scroll" title="Back to top">
+    <span class="fa fa-chevron-up"></span>
+  </div>
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-infinity");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(elements.splice(0, 100));
+    container.show();
+    window.addEventListener("scroll", () => {
+      const element = container.get(0);
+      const position = window.scrollY + window.innerHeight + 100;
+      const threshold = element.offsetTop + element.scrollHeight;
+      if (position > threshold) {
+        container.append(elements.splice(0, 100));
+      }
+    });
+  }
+});
+
+</script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const left = document.getElementById("livemark-left");
+  const mobile = document.getElementById("livemark-mobile");
+  mobile.addEventListener("click", () => {
+    left.classList.toggle("active");
+    mobile.classList.toggle("active");
+  });
+  // NOTE: We can replace the selector by 'a:not[href=""]' after #57
+  left.querySelectorAll("li:not(.group) a").forEach((link) => {
+    link.addEventListener("click", () => {
+      if (left.classList.contains("active")) {
+        left.classList.remove("active");
+        mobile.classList.remove("active");
+      }
+    });
+  });
+});
+
+</script>
+
+<div id="livemark-mobile">
+  <div class="stack" title="Toggle menu">
+    <span class="bar"></span>
+    <span class="bar"></span>
+    <span class="bar"></span>
+  </div>
+</div>
+<script src="https://unpkg.com/paginationjs@2.1.5/dist/pagination.min.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const container = $(".livemark-pagination");
+  if (container.length) {
+    const elements = container
+      .children()
+      .map((index, element) => element.outerHTML)
+      .get();
+    container.html(`
+      <div class="livemark-pagination-data"></div>
+      <div class="livemark-pagination-navs"></div>
+    `);
+    container.find(".livemark-pagination-navs").pagination({
+      dataSource: elements,
+      callback: (html) => {
+        container.find(".livemark-pagination-data").html(html);
+      },
+    });
+    container.show();
+  }
+});
+
+</script>
+<script src="https://unpkg.com/lunr@2.3.9/lunr.min.js"></script>
+<script src="https://unpkg.com/jquery-highlight@3.5.0/jquery.highlight.js"></script>
+<script src="https://unpkg.com/jquery.scrollto@2.1.3/jquery.scrollTo.js"></script>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  const prepare = () => {
+    const searchParams = new URLSearchParams(window.location.search);
+    const query = searchParams.get('query') || ''
+    if (query.length >= 3) {
+      searchInput.value = query
+    }
+  }
+  const search = () => {
+    unhighlight()
+    query = searchInput.value
+    searchOutput.innerHTML = ''
+    searchOutput.style.visibility = 'hidden'
+    const searchParams = new URLSearchParams(window.location.search);
+    if (query.length < 3) return
+    const results = searchIndex.search(query)
+    if (!results.length) return
+    searchParams.set('query', query)
+    const newRelativePathQuery = window.location.pathname + '?' + searchParams.toString();
+    history.pushState(null, '', newRelativePathQuery);
+    const elements = []
+    for (const result of results) {
+      const item = searchItems[result.ref]
+      const link = `${item.relpath}.html`
+      const cls = window.location.pathname === link ? 'class="active"' : ''
+      elements.push(`<li ${cls}><a href="${link}?query=${query}">${item.name}</a></li>`)
+    }
+    searchOutput.innerHTML = `<ul>\n${elements.join('\n')}\n</ul>`
+    searchOutput.style.visibility = 'visible'
+    highlight()
+  }
+  const highlight = () => {
+    const stem = lunr.stemmer(new lunr.Token(query)).str
+    $('#livemark-main').highlight(stem, {className: 'livemark-search-found'});
+    setTimeout(() => {
+      $(window).scrollTo($('.livemark-search-found').first(), 1000)
+    }, 1000)
+  }
+  const unhighlight = () => {
+    $('#livemark-main').unhighlight({className: 'livemark-search-found'});
+  }
+  const searchItems = {
+          '404': {
+          'name': 'Not Found',
+          'path': '404',
+          'relpath': '404',
+          'text': "# Not Found ```markdown remark type=danger This page is not found ``` Return to the home page.",
+      },
+          'index': {
+          'name': 'frictionless-py',
+          'path': 'index',
+          'relpath': 'index',
+          'text': "# frictionless-py [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/frictionless-py/general.yaml?branch=main)](https://github.com/frictionlessdata/frictionless-py/actions) [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/frictionless-py/main)](https://codecov.io/gh/frictionlessdata/frictionless-py) [![Release](https://img.shields.io/pypi/v/frictionless.svg)](https://pypi.python.org/pypi/frictionless) [![Citation](https://zenodo.org/badge/28409905.svg)](https://zenodo.org/badge/latestdoi/28409905) [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/frictionless-py) [![Support](https://img.shields.io/badge/support-slack-brightgreen)](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) ```markdown remark type=primary Migrating from an older version? Please read **[v5](blog/2022/08-22-frictionless-framework-v5.html)** announcement and migration guide. ``` Data management framework for Python that provides functionality to describe, extract, validate, and transform tabular data (DEVT Framework). It supports a great deal of data sources and formats, as well as provides popular platforms integrations. The framework is powered by the lightweight yet comprehensive [Frictionless Standards](https://specs.frictionlessdata.io/). ## Purpose - **Describe your data**: You can infer, edit and save metadata of your data tables. It\u0027s a first step for ensuring data quality and usability. Frictionless metadata includes general information about your data like textual description, as well as, field types and other tabular data details. - **Extract your data**: You can read your data using a unified tabular interface. Data quality and consistency are guaranteed by a schema. Frictionless supports various file schemes like HTTP, FTP, and S3 and data formats like CSV, XLS, JSON, SQL, and others. - **Validate your data**: You can validate data tables, resources, and datasets. Frictionless generates a unified validation report, as well as supports a lot of options to customize the validation process. - **Transform your data**: You can clean, reshape, and transfer your data tables and datasets. Frictionless provides a pipeline capability and a lower-level interface to work with the data. ## Features - Open Source (MIT) - Powerful Python framework - Convenient command-line interface - Low memory consumption for data of any size - Reasonable performance on big data - Support for compressed files - Custom checks and formats - Fully pluggable architecture - The included API server - More than 1000+ tests ## Installation ```bash $ pip install frictionless ``` ## Example ```bash $ frictionless validate data/invalid.csv [invalid] data/invalid.csv row field code message ----- ------- ---------------- -------------------------------------------- 3 blank-header Header in field at position \"3\" is blank 4 duplicate-header Header \"name\" in field \"4\" is duplicated 2 3 missing-cell Row \"2\" has a missing cell in field \"field3\" 2 4 missing-cell Row \"2\" has a missing cell in field \"name2\" 3 3 missing-cell Row \"3\" has a missing cell in field \"field3\" 3 4 missing-cell Row \"3\" has a missing cell in field \"name2\" 4 blank-row Row \"4\" is completely blank 5 5 extra-cell Row \"5\" has an extra value in field \"5\" ``` ## Documentation Please visit our documentation portal: - https://framework.frictionlessdata.io",
+      },
+          'docs/getting-started': {
+          'name': 'Getting Started',
+          'path': 'docs/getting-started',
+          'relpath': 'docs/getting-started',
+          'text': "# Getting Started Let\u0027s get started with Frictionless! We will learn how to install and use the framework. The simple example below will showcase the framework\u0027s basic functionality. ## Installation \u003e The framework requires Python3.8+. Versioning follows the [SemVer Standard](https://semver.org/). ```bash tabs=CLI pip install frictionless pip install frictionless[sql] # to install a core plugin (optional) pip install \u0027frictionless[sql]\u0027 # for zsh shell ``` The framework supports CSV, Excel, and JSON formats by default. The second command above installs a plugin for SQL support. There are plugins for SQL, Pandas, HTML, and others (all supported plugins are listed in the \"File Formats\" and schemes in \"File Schemes\" menu). Usually, you don\u0027t need to think about it in advance\u2013frictionless will display a useful error message about a missing plugin with installation instructions. ### Troubleshooting Did you have an error installing Frictionless? Here are some dependencies and common errors: - `pip: command not found`. Please see the [pip docs](https://pip.pypa.io/en/stable/installing/) for help installing pip. - [Installing Python help (Mac)](https://docs.python.org/3/using/mac.html) - [Installing Python help (Windows)](https://docs.python.org/3/using/windows.html) Still having a problem? Ask us for help on our [Discord](https://discord.com/invite/j9DNFNw) chat or open an [issue](https://github.com/frictionlessdata/frictionless-py/issues). We\u0027re happy to help! ## Usage The framework can be used: - as a Python library - as a command-line interface - as a restful API server (for advanced use cases) For instance, all the examples below do the same thing: ```bash tabs=CLI frictionless extract data/table.csv ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv\u0027) ``` ```json tabs=API [POST] /extract {\"path\u0027: \u0027data/table.csv\"} ``` All these interfaces are as much alike as possible regarding naming conventions and the way you interact with them. Usually, it\u0027s straightforward to translate, for instance, Python code to a command-line call. Frictionless provides code completion for Python and the command-line, which should help to get useful hints in real time. You can find the API reference at the bottom of the respective page, for example: [Schema API Reference](../../docs/framework/schema.html#reference). Arguments conform to the following naming convention: - for Python interfaces, they are snake_cased, e.g. `missing_values` - within dictionaries or JSON objects, they are camelCased, e.g. `missingValues` - in the command line, they use dashes, e.g. `--missing-values` To get the documentation for a command-line interface just use the `--help` flag: ```bash tabs=CLI frictionless --help frictionless describe --help frictionless extract --help frictionless validate --help frictionless transform --help ``` ## Example \u003e Download [`invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/invalid.csv) to reproduce the examples (right-click and \"Save link as\"). For more examples, please take a look at the [Basic Examples](basic-examples.md) article. We will take a very messy data file: ```bash script tabs=CLI cat invalid.csv ``` ```python script tabs=Python with open(\u0027invalid.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s use `describe` to infer the metadata directly from the tabular data. We can then edit and save it to provide others with useful information about the data: \u003e The CLI output is in [YAML](https://yaml.org/), it is a default Frictionless output format. ```bash script tabs=CLI output=yaml frictionless describe invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027invalid.csv\u0027) pprint(resource) ``` Now that we have inferred a table schema from the data file (e.g., expected format of the table, expected type of each value in a column, etc.), we can use `extract` to read the normalized tabular data from the source CSV file: ```bash script tabs=CLI frictionless extract invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027invalid.csv\u0027) pprint(rows) ``` Last but not least, let\u0027s get a validation report. This report will help us to identify and fix all the errors present in the tabular data, as comprehensive information is provided for every problem: ```bash script tabs=CLI frictionless validate invalid.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027invalid.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now that we have all this information: - we can clean up the table to ensure the data quality - we can use the metadata to describe and share the dataset - we can include the validation into our workflow to guarantee the validity - and much more: don\u0027t hesitate and read the following sections of the documentation!",
+      },
+          'docs/basic-examples': {
+          'name': 'Basic Examples',
+          'path': 'docs/basic-examples',
+          'relpath': 'docs/basic-examples',
+          'text': "# Basic Examples Let\u0027s start with an example dataset. We will look at a few raw data files that have recently been collected by an anthropologist. The anthropologist wants to publish this data in an open repository so her colleagues can also use this data. Before publishing the data, she wants to add metadata and check the data for errors. We are here to help, so let\u2019s start by exploring the data. We see that the quality of data is far from perfect. In fact, the first row contains comments from the anthropologist! To be able to use this data, we need to clean it up a bit. \u003e Download [`countries.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/countries.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat countries.csv ``` ```python script tabs=Python with open(\u0027countries.csv\u0027) as file: print(file.read()) ``` As we can see, this is data containing information about European countries and their populations. Also, it looks like there are two fields having a relationship based on a country\u0027s identifier: neighbor_id is a Foreign Key to id. ## Describing Data First of all, we\u0027re going to describe our dataset. Frictionless uses the powerful [Frictionless Data Specifications](https://specs.frictionlessdata.io/). They are very handy to describe: - a data table - using [Table Schema](https://specs.frictionlessdata.io/table-schema/) - a data resource - using [Data Resource](https://specs.frictionlessdata.io/data-resource/) - a data package - using [Data Package](https://specs.frictionlessdata.io/data-package/) - and other objects Let\u0027s describe the `countries` table: ```bash script tabs=CLI output=yaml frictionless describe countries.csv # optionally add --stats to get statistics ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import describe resource = describe(\u0027countries.csv\u0027) pprint(resource) ``` As we can see, Frictionless was smart enough to understand that the first row contains a comment. It\u0027s good, but we still have a few problems: - we use `n/a` as a missing values marker - `neighbor_id` must be numerical: let\u0027s edit the schema - `population` must be numerical: setting proper missing values will solve it - there is a relation between the `id` and `neighbor_id` fields Let\u0027s update our metadata and save it to the disc: \u003e Open this file in your favorite editor and update as it\u0027s shown below ```bash tabs=CLI frictionless describe countries.csv --yaml \u003e countries.resource.yaml editor countries.resource.yaml ``` ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"n/a\"]) resource = describe(\"countries.csv\", detector=detector) resource.schema.set_field_type(\"neighbor_id\", \"integer\") resource.schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) resource.to_yaml(\"countries.resource.yaml\") ``` Let\u0027s see what we have created: ```bash script tabs=CLI output=yaml cat countries.resource.yaml ``` ```python script tabs=Python output=yaml with open(\u0027countries.resource.yaml\u0027) as file: print(file.read()) ``` It has the same metadata as we saw above but also includes our editing related to missing values and data types. We didn\u0027t change all the wrong data types manually because providing proper missing values had fixed it automatically. Now we have a resource descriptor. In the next section, we will show why metadata matters and how to use it. ## Extracting Data It\u0027s time to try extracting our data as a table. As a first naive attempt, we will ignore the metadata we saved on the previous step: ```bash script tabs=CLI frictionless extract countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.csv\u0027) pprint(rows) ``` Actually, it doesn\u0027t look terrible, but in reality, data like this is not quite useful: - it\u0027s not possible to export this data e.g., to SQL because integers are mixed with strings - there is still a basically empty row we don\u0027t want to have - there are some mistakes in the neighbor_id column The output of the extract is in \u0027utf-8\u0027 encoding scheme. Let\u0027s use the metadata we save to try extracting data with the help of Frictionless Data specifications: ```bash script tabs=CLI frictionless extract countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\u0027countries.resource.yaml\u0027) pprint(rows) ``` It\u0027s now much better! Numerical fields are numerical fields, and there are no more textual missing values markers. We can\u0027t see in the command-line, but missing values are now `None` values in Python, and the data can be e.g., exported to SQL. Although, it\u0027s still not ready for being published. In the next section, we will validate it! ## Validating Data Data validation with Frictionless is as easy as describing or extracting data: ```bash script tabs=CLI frictionless validate countries.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.csv\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Ahh, we had seen that coming. The data is not valid; there are some missing and extra cells. But wait a minute, in the first step, we created the metadata file with more information about our table. We have to use it. ```bash script tabs=CLI frictionless validate countries.resource.yaml ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\u0027countries.resource.yaml\u0027) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` Now it\u0027s even worse, but regarding data validation errors, the more, the better, actually. Thanks to the metadata, we were able to reveal some critical errors: - the bad data types, i.e. `Ireland` instead of an id - the bad relation between `id` and `neighbor_id`: we don\u0027t have a country with id 22 In the next section, we will clean up the data. ## Transforming Data We will use metadata to fix all the data type problems automatically. The only two things we need to handle manually: - France\u0027s population - Germany\u0027s neighborhood ```bash script tabs=CLI cat \u003e countries.pipeline.yaml \u003c\u003cEOF steps: - type: cell-replace fieldName: neighbor_id pattern: \u002722\u0027 replace: \u00272\u0027 - type: cell-replace fieldName: population pattern: \u0027n/a\u0027 replace: \u002767\u0027 - type: row-filter formula: population - type: field-update name: neighbor_id descriptor: type: integer - type: field-update name: population descriptor: type: integer - type: table-normalize - type: table-write path: countries-cleaned.csv EOF frictionless transform countries.csv --pipeline countries.pipeline.yaml ``` ```python script tabs=Python output=Python from pprint import pprint from frictionless import Resource, Pipeline, describe, transform, steps pipeline = Pipeline(steps=[ steps.cell_replace(field_name=\u0027neighbor_id\u0027, pattern=\u002722\u0027, replace=\u00272\u0027), steps.cell_replace(field_name=\u0027population\u0027, pattern=\u0027n/a\u0027, replace=\u002767\u0027), steps.row_filter(formula=\u0027population\u0027), steps.field_update(name=\u0027neighbor_id\u0027, descriptor={\"type\": \"integer\"}), steps.table_normalize(), steps.table_write(path=\"countries-cleaned.csv\"), ]) source = Resource(\u0027countries.csv\u0027) target = source.transform(pipeline) pprint(target.read_rows()) ``` Finally, we\u0027ve got the cleaned version of our data, which can be exported to a database or published. We have used a CSV as an output format but could have used Excel, JSON, SQL, and others. ```bash script tabs=CLI cat countries-cleaned.csv ``` ```python script tabs=Python with open(\u0027countries-cleaned.csv\u0027) as file: print(file.read()) ``` Basically, that\u0027s it; now, we have a valid data file and a corresponding metadata file. It can be shared with other people or stored without fear of type errors or other problems making research data not reproducible. ```bash script tabs=CLI ls countries-cleaned.* ``` ```python script tabs=Python import os files = [f for f in os.listdir(\u0027.\u0027) if os.path.isfile(f) and f.startswith(\u0027countries-cleaned.\u0027)] print(files) ``` In the next articles, we will explore more advanced Frictionless functionality.",
+      },
+          'docs/guides/describing-data': {
+          'name': 'Describing Data',
+          'path': 'docs/guides/describing-data',
+          'relpath': 'docs/guides/describing-data',
+          'text': "# Describing Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Also, this guide is meant to be read in order from top to bottom, and reuses examples throughout the text. You can use the menu to skip sections, but please note that you might need to run code from earlier sections to make all the examples work. In Frictionless terms, \"Describing data\" means creating metadata for your data files. Having metadata is important because data files by themselves usually do not provide enough information to fully understand the data. For example, if you have a data table in a CSV format without metadata, you are missing a few critical pieces of information: - the meaning of the fields e.g., what the `size` field means (does that field mean geographic size? Or does it refer to the size of the file?) - data type information e.g., is this field a string or an integer? - data constraints e.g., the minimum temperature for your measurements - data relations e.g., identifier connections - and others For a dataset, there is even more information that can be provided, like the general purpose of a dataset, information about data sources, list of authors, and more. Also, when there are many tabular files, relational rules can be very important. Usually, there are foreign keys ensuring the integrity of the dataset; for example, think of a reference table containing country names and other data tables using it as a reference. Data in this form is called \"normalized data\" and it occurs very often in scientific and other kinds of research. Now that we have a general understanding of what \"describing data\" is, we can discuss why it is important: - **data validation**: metadata helps to reveal problems in your data during early stages of your workflow - **data publication**: metadata provides additional information that your data doesn\u0027t include These are not the only positives of having metadata, but they are two of the most important. Please continue reading to learn how Frictionless helps to achieve these advantages by describing your data. This guide will discuss the main `describe` functions (`describe`, `Schema.describe`, `Resource.describe`, `Package.describe`) and will then go into more detail about how to create and edit metadata in Frictionless. For the following examples, you will need to have Frictionless installed. See our [Quick Start Guide](https://framework.frictionlessdata.io/docs/guides/quick-start) if you need help. ```bash tabs=CLI pip install frictionless ``` ## Describe Functions The `describe` functions are the main Frictionless tool for describing data. In many cases, this high-level interface is enough for data exploration and other needs. The frictionless framework provides 4 different `describe` functions in Python: - `describe`: detects the source type and returns Data Resource or Data Package metadata - `Schema.describe`: always returns Table Schema metadata - `Resource.describe`: always returns Data Resource metadata - `Package.describe`: always returns Data Package metadata As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. In the command-line, there is only 1 command (`describe`) but there is also a flag to adjust the behavior: ```bash tabs=CLI frictionless describe your-table.csv frictionless describe your-table.csv --type schema frictionless describe your-table.csv --type resource frictionless describe your-table.csv --type package ``` Please take into account that file names might be used by Frictionless to detect a metadata type for data extraction or validation. It\u0027s recommended to use corresponding suffixes when you save your metadata to the disk. For example, you might name your Table Schema as `table.schema.yaml`, Data Resource as `table.resource.yaml`, and Data Package as `table.package.yaml`. If there is no hint in the file name Frictionless will assume that it\u0027s a resource descriptor by default. For example, if we want a Data Package descriptor for a single file: \u003e Download [`table.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/table.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI output=yaml frictionless describe table.csv --type package ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"table.csv\", type=\"package\") print(package.to_yaml()) ``` ## Describing a Schema Table Schema is a specification for providing a \"schema\" (similar to a database schema) for tabular data. This information includes the expected data type for each value in a column (\"string\", \"number\", \"date\", etc.), constraints on the value (\"this string can only be at most 10 characters long\"), and the expected format of the data (\"this field should only contain strings that look like email addresses\"). Table Schema can also specify relations between data tables. We\u0027re going to use this file for the examples in this section. For this guide, we only use CSV files because of their demonstrativeness, but in general Frictionless can handle data in Excel, JSON, SQL, and many other formats: \u003e Download [`country-1.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-1.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-1.csv ``` ```python script tabs=Python with open(\u0027country-1.csv\u0027) as file: print(file.read()) ``` Let\u0027s get a Table Schema using the Frictionless framework (note: this example uses YAML for the schema format, but Frictionless also supports JSON format): ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.to_yaml(\"country.schema.yaml\") # use schema.to_json for JSON ``` The high-level functions of Frictionless operate on the dataset and resource levels so we have to use a little bit of Python programming to get the schema information. Below we will show how to use a command-line interface for similar tasks. ```bash script tabs=CLI output=yaml cat country.schema.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema.yaml\u0027) as file: print(file.read()) ``` As we can see, we were able to infer basic metadata from our data file. But describing data doesn\u0027t end here - we can provide additional information that we discussed earlier: \u003e You can edit \"country.schema.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema schema = Schema.describe(\"country-1.csv\") schema.get_field(\"id\").title = \"Identifier\" schema.get_field(\"neighbor_id\").title = \"Identifier of the neighbor\" schema.get_field(\"name\").title = \"Name of the country\" schema.get_field(\"population\").title = \"Population\" schema.get_field(\"population\").description = \"According to the year 2020\u0027s data\" schema.get_field(\"population\").constraints[\"minimum\"] = 0 schema.foreign_keys.append( {\"fields\": [\"neighbor_id\"], \"reference\": {\"resource\": \"\", \"fields\": [\"id\"]}} ) schema.to_yaml(\"country.schema-full.yaml\") ``` Let\u0027s break it down: - we added a title for all the fields - we added a description to the \"Population\" field; the year information can be critical to interpret the data - we set a constraint to the \"Population\" field because it can\u0027t be less than 0 - we added a foreign key saying that \"Identifier of the neighbor\" should be present in the \"Identifier\" field ```bash script tabs=CLI output=yaml cat country.schema-full.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.schema-full.yaml\u0027) as file: print(file.read()) ``` Later we\u0027re going to show how to use the schema we created to ensure the validity of your data; in the next few sections, we will focus on Data Resource and Data Package metadata. To continue learning about table schemas please read: - [Table Schema Spec](https://specs.frictionlessdata.io/table-schema/) - [API Reference: Schema](../../docs/framework/schema.html#reference-schema) ## Describing a Resource The Data Resource format describes a data resource such as an individual file or data table. The essence of a Data Resource is a path to the data file it describes. A range of other properties can be declared to provide a richer set of metadata including Table Schema for tabular data. For this section, we will use a file that is slightly more complex to handle. In this example, cells are separated by the \";\" character and there is a comment on the top: \u003e Download [`country-2.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-2.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` Let\u0027s describe it: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\u0027country-2.csv\u0027) print(resource.to_yaml()) ``` OK, that looks wrong -- for example, the schema has only inferred one field, and that field does not seem correct either. As we have seen in the \"Introductory Guide\" Frictionless is capable of inferring some complicated cases\u0027 metadata but our data table is too complex for it to automatically infer. We need to manually program it: \u003e You can edit \"country.resource.yaml\" manually instead of running Python ```python script tabs=Python from frictionless import Schema, describe resource = describe(\"country-2.csv\") resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.schema = \"country.schema.yaml\" resource.to_yaml(\"country.resource-cleaned.yaml\") ``` So what we did here: - we set the header rows to be row number 2; as humans, we can easily see that was the proper row - we set the CSV Delimiter to be \";\" - we reuse the schema we created [earlier](#describing-a-schema) as the data has the same structure and meaning ```bash script tabs=CLI output=yaml cat country.resource-cleaned.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-cleaned.yaml\u0027) as file: print(file.read()) ``` Our resource metadata includes the schema metadata we created earlier, but it also has: - general information about the file\u0027s schema, format, and compression - information about CSV Dialect which helps software understand how to read it - checksum information like hash, bytes, and rows But the most important difference is that the resource metadata contains the `path` property. This is a conceptual distinction of the Data Resource specification compared to the Table Schema specification. While a Table Schema descriptor can describe a class of data files, a Data Resource descriptor describes only one exact data file, `data/country-2.csv` in our case. Using programming terminology we could say that: - Table Schema descriptor is abstract (for a class of files) - Data Resource descriptor is concrete (for an individual file) We will show the practical difference in the \"Using Metadata\" section, but in the next section, we will overview the Data Package specification. To continue learning about data resources please read: - [Data Resource Spec](https://specs.frictionlessdata.io/data-resource/) - [API Reference: Resource](../../docs/framework/resource.html#reference-resource) ## Describing a Package A Data Package consists of: - Metadata that describes the structure and contents of the package - Resources such as data files that form the contents of the package The Data Package metadata is stored in a \"descriptor\". This descriptor is what makes a collection of data a Data Package. The structure of this descriptor is the main content of the specification below. In addition to this descriptor, a data package will include other resources such as data files. The Data Package specification does NOT impose any requirements on their form or structure and can, therefore, be used for packaging any kind of data. The data included in the package may be provided as: - Files bundled locally with the package descriptor - Remote resources, referenced by URL (see the [schemes tutorial](https://framework.frictionlessdata.io/docs/tutorials/schemes/local-tutorial) for more information about supported URLs) - \"Inline\" data (see below) which is included directly in the descriptor For this section, we will use the following files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\") ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` First of all, let\u0027s describe our package now. We did it before for a resource but now we\u0027re going to use a glob pattern to indicate that there are multiple files: ```bash script tabs=CLI output=yaml frictionless describe *-3.csv ``` ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") print(package.to_yaml()) ``` We have already learned about many concepts that are reflected in this metadata. We can see resources, schemas, fields, and other familiar entities. The difference is that this descriptor has information about multiple files which is a popular way of sharing data - in datasets. Very often you have not only one data file but also additional data files, some textual documents e.g. PDF, and others. To package all of these files with the corresponding metadata we use data packages. Following the pattern that is already familiar to the guide reader, we add some additional metadata: \u003e You can edit \"country.package.yaml\" manually instead of running Python ```python script tabs=Python output=yaml from frictionless import describe package = describe(\"*-3.csv\") package.title = \"Countries and their capitals\" package.description = \"The data was collected as a research project\" package.get_resource(\"country-3\").name = \"country\" package.get_resource(\"capital-3\").name = \"capital\" package.get_resource(\"country\").schema.foreign_keys.append( {\"fields\": [\"capital_id\"], \"reference\": {\"resource\": \"capital\", \"fields\": [\"id\"]}} ) package.to_yaml(\"country.package.yaml\") ``` In this case, we add a relation between different files connecting `id` and `capital_id`. Also, we provide dataset-level metadata to explain the purpose of this dataset. We haven\u0027t added individual fields\u0027 titles and descriptions, but that can be done as it was shown in the \"Table Schema\" section. ```bash script tabs=CLI output=yaml cat country.package.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.package.yaml\u0027) as file: print(file.read()) ``` The main role of the Data Package descriptor is describing a dataset; as we can see, it includes previously shown descriptors like `schema`, `dialect`, and `resource`. But it would be a mistake to think that Data Package is the least important specification; actually, it completes the Frictionless Data suite making it possible to share and validate not only individual files but also complete datasets. To continue learning about data resources please read: - [Data Package Spec](https://specs.frictionlessdata.io/data-package/) - [API Reference: Package](../../docs/framework/package.html#reference-package) ## Metadata Importance This documentation contains a great deal of information on how to use metadata and why it\u0027s vital for your data. In this section, we\u0027re going to provide a quick example based on the \"Data Resource\" section but please read other documents to get the full picture. Let\u0027s get back to this complex data table: ```bash script tabs=CLI cat country-2.csv ``` ```python script tabs=Python with open(\u0027country-2.csv\u0027) as file: print(file.read()) ``` As we tried before, by default Frictionless can\u0027t properly describe this file so we got something like: ```bash script tabs=CLI output=yaml frictionless describe country-2.csv ``` ```python script tabs=Python output=yaml from frictionless import describe resource = describe(\"country-2.csv\") print(resource.to_yaml()) ``` Trying to extract the data will fail this way: ```bash script tabs=CLI frictionless extract country-2.csv ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import extract rows = extract(\"country-2.csv\") pprint(rows) ``` This example highlights a really important idea - without metadata many software will not be able to even read this data file. Furthermore, without metadata people cannot understand the purpose of this data. To see how we can use metadata to fix our data, let\u0027s now use the `country.resource-full.yaml` file we created in the \"Data Resource\" section with Frictionless `extract`: ```bash script tabs=CLI frictionless extract country.resource-cleaned.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\"country.resource-cleaned.yaml\") pprint(rows) ``` As we can see, the data is now fixed. The metadata we had saved the day! If we explore this data in Python we can discover that it also corrected data types - e.g. `id` is Python\u0027s integer not string. We can now export and share this data without any worries. ## Inferring Metadata \u003e Many Frictionless Framework\u0027s classes are metadata classes as though Schema, Resource, or Package. All the sections below are applicable for all these classes. You can read about the base Metadata class in more detail in [API Reference](../references/api-reference.md#metadata). Many Frictionless functions infer metadata under the hood such as `describe`, `extract`, and many more. On a lower-level, it\u0027s possible to control this process. To see this, let\u0027s create a `Resource`. ```python script tabsl=Python output=python from frictionless import Resource resource = Resource(\"country-1.csv\") print(resource) ``` ``` {\u0027path\u0027: \u0027country-1.csv\u0027} ``` Frictionless always tries to be as explicit as possible. We didn\u0027t provide any metadata except for `path` so we got the expected result. But now, we\u0027d like to `infer` additional metadata: \u003e We can ask for stats using CLI with `frictionless describe data/table.csv --stats`. Note that we use the `stats` argument for the `resource.infer` function. ```bash script tabs=CLI output=json frictionless describe country-1.csv --stats --json ``` ```python script tabs=Python output=python from pprint import pprint from frictionless import Resource resource = Resource(\"country-1.csv\") resource.infer(stats=True) pprint(resource) ``` The result is really familiar to us already. We have seen it a lot as an output of the `describe` function or command. Basically, that\u0027s what this high-level function does under the hood: create a resource and then infer additional metadata. All the main `Metadata` classes have this method with different available options but with the same conceptual purpose: - `package.infer` - `resource.infer` For more advanced detection options, please read the [Detector Guide](framework/detector-guide.md) ## Validating Metadata Metadata validity is an important topic, and we recommend validating your metadata before publishing. For example, let\u0027s first make it invalid: ```python script tabs=Python import yaml from frictionless import Resource descriptor = {} descriptor[\u0027path\u0027] = \u0027country-1.csv\u0027 descriptor[\u0027title\u0027] = 1 try: Resource(descriptor) except Exception as exception: print(exception.error) print(exception.reasons) ``` ``` False [{\u0027code\u0027: \u0027resource-error\u0027, \u0027name\u0027: \u0027Resource Error\u0027, \u0027tags\u0027: [], \u0027note\u0027: \u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027message\u0027: \u0027The data resource has an error: \"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027, \u0027description\u0027: \u0027A validation cannot be processed.\u0027}] ``` We see this error`\u0027\"1 is not of type \\\u0027string\\\u0027\" at \"title\" in metadata and at \"properties/title/type\" in profile\u0027` as we set `title` to be an integer. Frictionless\u0027 high-level functions like `validate` runs all metadata checks by default. ## Transforming Metadata We have seen this before but let\u0027s re-iterate; it\u0027s possible to transform core metadata properties using Python\u0027s interface: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-cleaned.yaml\") resource.title = \"Countries\" resource.description = \"It\u0027s a research project\" resource.dialect.header_rows = [2] resource.dialect.get_control(\u0027csv\u0027).delimiter = \";\" resource.to_yaml(\"country.resource-updated.yaml\") ``` We can add custom options using the `custom` property: ```python script tabs=Python from frictionless import Resource resource = Resource(\"country.resource-updated.yaml\") resource.custom[\"customKey1\"] = \"Value1\" resource.custom[\"customKey2\"] = \"Value2\" resource.to_yaml(\"country.resource-updated2.yaml\") ``` Let\u0027s check it out: ```bash script tabs=CLI output=yaml cat country.resource-updated2.yaml ``` ```python script tabs=Python output=yaml with open(\u0027country.resource-updated2.yaml\u0027) as file: print(file.read()) ```",
+      },
+          'docs/guides/extracting-data': {
+          'name': 'Extracting Data',
+          'path': 'docs/guides/extracting-data',
+          'relpath': 'docs/guides/extracting-data',
+          'text': "# Extracting Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Extracting data means reading tabular data from a source. We can use various customizations for this process such as providing a file format, table schema, limiting fields or rows amount, and much more. This guide will discuss the main `extract` functions (`extract`, `extract_resource`, `extract_package`) and will then go into more advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. The output from the extract function is in \u0027utf-8\u0027 encoding scheme. Let\u0027s see this with some real files: \u003e Download [`country-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/country-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat country-3.csv ``` ```python script tabs=Python with open(\u0027country-3.csv\u0027) as file: print(file.read()) ``` \u003e Download [`capital-3.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/capital-3.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-3.csv ``` ```python script tabs=Python with open(\u0027capital-3.csv\u0027) as file: print(file.read()) ``` To start, we will extract data from a resource: ```bash script tabs=CLI frictionless extract country-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027country-3.csv\u0027) pprint(rows) ``` ## Extract Functions The high-level interface for extracting data provided by Frictionless is a set of `extract` functions: - `extract`: detects the source file type and extracts data accordingly - `resource.extract`: returns a data table - `package.extract`: returns a map of the package\u0027s tables As described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction), a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. The command/function would be used as follows: ```bash tabs=CLI frictionless extract your-table.csv frictionless extract your-resource.json --type resource frictionless extract your-package.json --type package ``` ```python tabs=Python from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) resource = extract(\u0027capital-3.csv\u0027, type=\"resource\") package = extract(\u0027capital-3.csv\u0027, type=\"package\") ``` The `extract` functions always reads data in the form of rows, into memory. The lower-level interfaces will allow you to stream data, which you can read about in the [Resource Class](#resource-class) section below. ## Extracting a Resource A resource contains only one file. To extract a resource, we have three options. First, we can use the same approach as above, extracting from the data file itself: ```bash script tabs=CLI frictionless extract capital-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital-3.csv\u0027) pprint(rows) ``` Our second option is to extract the resource from a descriptor file by using the `extract_resource` function. A descriptor file is useful because it can contain different metadata and be stored on the disc. As an example of how to use `extract_resource`, let\u0027s first create a descriptor file (note: this example uses YAML for the descriptor, but Frictionless also supports JSON): ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.infer() # as an example, in the next line we will append the schema resource.schema.missing_values.append(\u00273\u0027) # will interpret 3 as a missing value resource.to_yaml(\u0027capital.resource-test.yaml\u0027) # use resource.to_json for JSON format ``` You can also use a pre-made descriptor file. Now, this descriptor file can be used to extract the resource: ```bash script tabs=CLI frictionless extract capital.resource-test.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027capital.resource.yaml\u0027) pprint(rows) ``` So what has happened in this example? We set the textual representation of the number \"3\" to be a missing value. In the output we can see how the `id` number 3 now appears as `None` representing a missing value. This toy example demonstrates how the metadata in a descriptor can be used; other values like \"NA\" are more common for missing values. You can read more advanced details about the [Resource Class below](#resource-class). ## Extracting a Package The third way we can extract information is from a package, which is a set of two or more files, for instance, two data files and a corresponding metadata file. As a primary example, we provide two data files to the `extract` command which will be enough to detect that it\u0027s a dataset. Let\u0027s start by using the command-line interface: ```bash script tabs=CLI frictionless extract *-3.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import extract data = extract(\u0027*-3.csv\u0027) pprint(data) ``` We can also extract the package from a descriptor file using the `package.extract` function (Note: see the [Package Class section](#package-class) for the creation of the `country.package.yaml` file): ```bash script tabs=CLI frictionless extract country.package.yaml ``` ```python script tabs=Python from frictionless import Package package = Package(\u0027country.package.yaml\u0027) pprint(package.extract()) ``` You can read more advanced details about the [Package Class below](#package-class). \u003e The following sections contain further, advanced details about the `Resource Class`, `Package Class`, `Header Class`, and `Row Class`. ## Resource Class The Resource class provides metadata about a resource with read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data which can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them. ### Reading Bytes It\u0027s a byte representation of the contents: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) ``` ### Reading Text It\u0027s a textual representation of the contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_text()) ``` ### Reading Cells For a tabular data there are raw representaion of the tabular contents: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_cells()) ``` ### Reading Rows For a tabular data there are row available which is are normalized lists presented as dictionaries: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_rows()) ``` ### Reading a Header For a tabular data there is the Header object available: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.header) ``` ### Streaming Interfaces It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is very big. For such cases, Frictionless provides streaming functions: ```python tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: resource.byte_stream resource.text_stream resource.list_stream resource.row_stream ``` ## Package Class The Package class provides functions to read the contents of a package. First of all, let\u0027s create a package descriptor: ```bash script tabs=CLI frictionless describe *-3.csv --json \u003e country.package.json ``` ```python script tabs=Python from frictionless import describe package = describe(\u0027*-3.csv\u0027) package.to_json(\u0027country.package.json\u0027) ``` Note that --json is used here to output the descriptor in JSON format. Without this, the default output is in YAML format as we saw above. We can create a package from data files (using their paths) and then read the package\u0027s resources: ```python script tabs=Python from frictionless import Package package = Package(\u0027*-3.csv\u0027) pprint(package.get_resource(\u0027country-3\u0027).read_rows()) pprint(package.get_resource(\u0027capital-3\u0027).read_rows()) ``` The package by itself doesn\u0027t provide any read functions directly because it\u0027s just a contrainer. You can select a pacakge\u0027s resource and use the Resource API from above for data reading.",
+      },
+          'docs/guides/validating-data': {
+          'name': 'Validating Data',
+          'path': 'docs/guides/validating-data',
+          'relpath': 'docs/guides/validating-data',
+          'text': "# Validating Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Tabular data validation is a process of identifying problems that have occured in your data so you can correct them. Let\u0027s explore how Frictionless helps to achieve this task using an invalid data table example: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```bash script tabs=CLI cat capital-invalid.csv ``` ```python script tabs=Python with open(\u0027capital-invalid.csv\u0027) as file: print(file.read()) ``` We can validate this file by using both command-line interface and high-level functions. Frictionless provides comprehensive error details so that errors can be understood by the user. Continue reading to learn the validation process in detail. ```bash script tabs=CLI frictionless validate capital-invalid.csv ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027) print(report) ``` ## Validate Functions The high-level interface for validating data provided by Frictionless is a set of `validate` functions: - `validate`: detects the source type and validates data accordingly - `Schema.validate_descriptor`: validates a schema\u0027s metadata - `resource.validate`: validates a resource\u0027s data and metadata - `package.validate`: validates a package\u0027s data and metadata - `inquiry.validate`: validates a special `Inquiry` object which represents a validation task instruction On the command-line, there is only one command but there is a flag to adjust the behavior. It\u0027s useful when you have a file which has a ambiguous type, for example, a json file containing a data instead of metadata: ```bash tabs=CLI frictionless validate your-data.csv frictionless validate your-schema.yaml --type schema frictionless validate your-data.csv --type resource frictionless validate your-package.json --type package frictionless validate your-inquiry.yaml --type inquiry ``` As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). ## Validating a Schema The `Schema.validate_descriptor` function is the only function validating solely metadata. To see this work, let\u0027s create an invalid table schema: ```python script tabs=Python import yaml from frictionless import Schema descriptor = {} descriptor[\u0027fields\u0027] = \u0027bad\u0027 # must be a list with open(\u0027bad.schema.yaml\u0027, \u0027w\u0027) as file: yaml.dump(descriptor, file) ``` And let\u0027s validate this schema: ```bash script tabs=CLI frictionless validate bad.schema.yaml ``` ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027bad.schema.yaml\u0027) pprint(report) ``` We see that the schema is invalid and the error is displayed. Schema validation can be very useful when you work with different classes of tables and create schemas for them. Using this function will ensure that the metadata is valid. ## Validating a Resource As was shown in the [\"Describing Data\" guide](https://framework.frictionlessdata.io/docs/guides/describing-data), a resource is a container having both metadata and data. We need to create a resource descriptor and then we can validate it: ```bash script tabs=CLI frictionless describe capital-invalid.csv \u003e capital.resource.yaml ``` ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.to_yaml(\u0027capital.resource.yaml\u0027) ``` Note: this example uses YAML for the resource descriptor format, but Frictionless also supports JSON format also. Let\u0027s now validate to ensure that we are getting the same result that we got without using a resource: ```bash script tabs=CLI frictionless validate capital.resource.yaml ``` ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital.resource.yaml\u0027) print(report) ``` Okay, why do we need to use a resource descriptor if the result is the same? The reason is metadata + data packaging. Let\u0027s extend our resource descriptor to show how you can edit and validate metadata: ```python script tabs=Python from frictionless import describe resource = describe(\u0027capital-invalid.csv\u0027) resource.add_defined(\u0027stats\u0027) # TODO: fix and remove this line resource.stats.md5 = \u0027ae23c74693ca2d3f0e38b9ba3570775b\u0027 # this is a made up incorrect resource.stats.bytes = 100 # this is wrong resource.to_yaml(\u0027capital.resource-bad.yaml\u0027) ``` We have added a few incorrect, made up attributes to our resource descriptor as an example. Now, the validation below reports these errors in addition to all the errors we had before. This example shows how concepts like Data Resource can be extremely useful when working with data. ```bash script tabs=CLI frictionless validate capital.resource-bad.yaml # TODO: it should have 7 errors ``` ```python script tabs=Python from frictionless import validate report = validate(\u0027capital.resource-bad.yaml\u0027) print(report) ``` ## Validating a Package A package is a set of resources + additional metadata. To showcase a package validation we need to use one more tabular file: \u003e Download [`capital-valid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-valid.csv) to reproduce the examples (right-click and \"Save link as\"). ```bash script tabs=CLI cat capital-valid.csv ``` ```python script tabs=Python with open(\u0027capital-valid.csv\u0027) as file: print(file.read()) ``` Now let\u0027s describe and validate a package which contains the data files we have seen so far: ```bash script tabs=CLI frictionless describe capital-*id.csv \u003e capital.package.yaml frictionless validate capital.package.yaml ``` ```python script tabs=Python from frictionless import describe, validate # create package descriptor package = describe(\"capital-*id.csv\") package.to_yaml(\"capital.package.yaml\") # validate report = validate(\"capital.package.yaml\") print(report) ``` As we can see, the result is in a similar format to what we have already seen, and shows errors as we expected: we have one invalid resource and one valid resource. ## Validating an Inquiry \u003e The Inquiry is an advanced concept mostly used by software integrators. For example, under the hood, Frictionless Framework uses inquiries to implement client-server validation within the built-in API. Please skip this section if this information feels unnecessary for you. Inquiry is a declarative representation of a validation job. It gives you an ability to create, export, and share arbitrary validation jobs containing a set of individual validation tasks. Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions. Let\u0027s create an Inquiry that includes an individual file validation and a resource validation. In this example we will use the data file, `capital-valid.csv` and the resource, `capital.resource.json` which describes the invalid data file we have already seen: ```python script tabs=Python from frictionless import Inquiry, InquiryTask inquiry = Inquiry(tasks=[ InquiryTask(path=\u0027capital-valid.csv\u0027), InquiryTask(resource=\u0027capital.resource.yaml\u0027), ]) inquiry.to_yaml(\u0027capital.inquiry.yaml\u0027) ``` As usual, let\u0027s run validation: ```bash script tabs=CLI frictionless validate capital.inquiry.yaml ``` ```python script tabs=Python from frictionless import validate report = validate(\"capital.inquiry.yaml\") print(report) ``` At first sight, it might not be clear why such a construct exists, but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. \u003e The Inquiry will use multiprocessing if there is the `parallel` flag provided. It might speed up your validation dramatically especially on a 4+ cores processor. ## Validation Report All the `validate` functions return a Validation Report. This is a unified object containing information about a validation: source details, the error, etc. Let\u0027s explore a report: ```python script tabs=Python output=python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there is a lot of information; you can find a detailed description of the Validation Report in the [API Reference](../docs/framework/report.html#reference). Errors are grouped by tasks (i.e. data files); for some validation there can be dozens of tasks. Let\u0027s use the `report.flatten` function to simplify the representation of errors. This function helps to represent a report as a list of errors: ```python script tabs=Python output=python from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"message\"])) ``` In some situations, an error can\u0027t be associated with a task; then it goes to the top-level `report.errors` property: ```python script tabs=Python output=python from frictionless import validate report = validate(\"bad.json\", type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tasks[].errors`, properties that can contain the Error object. Let\u0027s explore it by taking a deeper look at the `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python output=python from frictionless import validate report = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) error = report.error # this is only available for one table / one error sitution print(error) ``` ``` {\u0027code\u0027: \u0027duplicate-label\u0027, \u0027description\u0027: \u0027Two columns in the header row have the same value. Column \u0027 \u0027names should be unique.\u0027, \u0027fieldName\u0027: \u0027name2\u0027, \u0027fieldNumber\u0027: 3, \u0027fieldPosition\u0027: 3, \u0027label\u0027: \u0027name\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027name\u0027, \u0027name\u0027], \u0027message\u0027: \u0027Label \"name\" in the header at position \"3\" is duplicated to a \u0027 \u0027label: at position \"2\"\u0027, \u0027name\u0027: \u0027Duplicate Label\u0027, \u0027note\u0027: \u0027at position \"2\"\u0027, \u0027rowPositions\u0027: [1], \u0027tags\u0027: [\u0027#table\u0027, \u0027#header\u0027, \u0027#label\u0027]} ``` Please explore the [Errors Reference](/docs/references/errors-reference) to learn about all the available errors and their properties. ## Available Checks There are various validation checks included in the core Frictionless Framework along with an ability to create custom checks. See [Validation Checks](../checks/cell.html) for a list of available checks. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks checks = [checks.sequential_value(field_name=\u0027id\u0027)] report = validate(\u0027capital-invalid.csv\u0027, checks=checks) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\", \"note\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027, \u0027at position \"2\"\u0027], [10, 3, \u0027missing-cell\u0027, \u0027\u0027], [10, 1, \u0027sequential-value\u0027, \u0027the value is not sequential\u0027], [11, None, \u0027blank-row\u0027, \u0027\u0027], [12, 1, \u0027type-error\u0027, \u0027type is \"integer/default\"\u0027], [12, 4, \u0027extra-cell\u0027, \u0027\u0027]] ``` \u003e Note that only the Baseline Check is enabled by default. Other built-in checks need to be activated as shown below. ## Custom Checks There are many cases when built-in Frictionless checks are not enough. For instance, you might want to create a business logic rule or specific quality requirement for the data. With Frictionless it\u0027s very easy to use your own custom checks. Let\u0027s see with an example: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, errors # Create check class forbidden_two(Check): Errors = [errors.CellError] def validate_row(self, row): if row[\u0027header\u0027] == 2: note = \u00272 is forbidden!\u0027 yield errors.CellError.from_row(row, note=note, field_name=\u0027header\u0027) # Validate table source = b\u0027header\\n1\\n2\\n3\u0027 report = validate(source, format=\u0027csv\u0027, checks=[forbidden_two()]) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"code\", \"note\"])) ``` Usually, it also makes sense to create a custom error for your custom check. The Check class provides other useful methods like `validate_header` etc. Please read the [API Reference](../references/api-reference.md) for more details. Learn more about custom checks in the [Check Guide](.../docs/checks/baseline.html#reference). ## Pick/Skip Errors We can pick or skip errors by providing a list of error codes. This is useful when you already know your data has some errors, but you want to ignore them for now. For instance, if you have a data table with repeating header names. Let\u0027s see an example of how to pick and skip errors: ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"duplicate-label\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"duplicate-label\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` It\u0027s also possible to use error tags (for more information please consult the [Errors Reference](../references/errors-reference.md)): ```python script tabs=Python from pprint import pprint from frictionless import validate report1 = validate(\"capital-invalid.csv\", pick_errors=[\"#header\"]) report2 = validate(\"capital-invalid.csv\", skip_errors=[\"#row\"]) pprint(report1.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) pprint(report2.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ## Limit Errors This option allows you to limit the amount of errors, and can be used when you need to do a quick check or want to \"fail fast\". For instance, here we use `limit_errors` to find just the 1st error and add it to our report: ```python title=\"Python\" from pprint import pprint from frictionless import validate report = validate(\"capital-invalid.csv\", limit_errors=1) pprint(report.flatten([\"rowNumber\", \"fieldNumber\", \"type\"])) ``` ``` [[None, 3, \u0027duplicate-label\u0027]] ```",
+      },
+          'docs/guides/transforming-data': {
+          'name': 'Transforming Data',
+          'path': 'docs/guides/transforming-data',
+          'relpath': 'docs/guides/transforming-data',
+          'text': "# Transforming Data \u003e This guide assumes basic familiarity with the Frictionless Framework. To learn more, please read the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction) and [Quick Start](https://framework.frictionlessdata.io/docs/guides/quick-start). Transforming data in Frictionless means modifying data and metadata from state A to state B. For example, it could be transforming a messy Excel file to a cleaned CSV file, or transforming a folder of data files to a data package we can publish more easily. To read more about the concepts behind Frictionless Transform, please check out the [Transform Principles](#transform-principles) sections belows. In comparison to similiar Python software like Pandas, Frictionless provides better control over metadata, has a modular API, and fully supports Frictionless Specifications. Also, it is a streaming framework with an ability to work with large data. As a downside of the Frictionless architecture, it might be slower compared to other Python packages, especially to projects like Pandas. Keep reading below to learn about the principles underlying Frictionless Transform, or [skip ahead](/docs/guides/transform-guide#transform-functions) to see how to use the Transform code. ## Transform Principles Frictionless Transform is based on a few core principles which are shared with other parts of the framework: ### Conceptual Simplicity Frictionless Transform can be thought of as a list of functions that accept a source resource/package object and return a target resource/package object. Every function updates the input\u0027s metadata and data - and nothing more. We tried to make this straightforward and conceptually simple, because we want our users to be able to understand the tools and master them. ### Metadata Matters There are plenty of great ETL-frameworks written in Python and other languages. We use one of them (PETL) under the hood (described in more detail later). The core difference between Frictionless and others is that we treat metadata as a first-class citizen. This means that you don\u0027t lose type and other important information during the pipeline evaluation. ### Data Streaming Whenever possible, Frictionless streams the data instead of reading it into memory. For example, for sorting big tables we use a memory usage threshold and when it is met we use the file system to unload the data. The ability to stream data gives users power to work with files of any size, even very large files. ### Lazy Evaluation With Frictionless all data manipulation happens on-demand. For example, if you reshape one table in a data package containing 10 big csv files, Frictionless will not even read the 9 other tables. Frictionless tries to be as explicit as possible regarding actions taken. For example, it will not use CPU resources to cast data unless a user adds a `normalize` step. So it\u0027s possible to transform a rather big file without even casting types, for example, if you only need to reshape it. ### Software Reuse For the core transform functions, Frictionless uses the amazing [PETL](https://petl.readthedocs.io/en/stable/) project under the hood. This library provides lazy-loading functionality in running data pipelines. On top of PETL, Frictionless adds metadata management and a bridge between Frictionless concepts like Package/Resource and PETL\u0027s processors. ## Transform Functions Frictionless supports a few different kinds of data and metadata transformations: - resource and package transformations - transformations based on a declarative pipeline The main difference between these is that resource and package transforms are imperative while pipelines can be created beforehand or shared as a JSON file. We\u0027ll talk more about pipelines in the [Transforming Pipeline](#transforming-pipeline) section below. First, we will introduce the transform functions, then go into detail about how to transform a resource and a package. As a reminder, in the Frictionless ecosystem, a resource is a single file, such as a data file, and a package is a set of files, such as a data file and a schema. This concept is described in more detail in the [Introduction](https://framework.frictionlessdata.io/docs/guides/introduction). \u003e Download [`transform.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/transform.csv) to reproduce the examples (right-click and \"Save link as\". You might need to change the file extension from .txt to .csv). ```bash script tabs=CLI cat transform.csv ``` The high-level interface to transform data is a set of `transform` functions: - `transform`: detects the source type and transforms data accordingly - `reosurce.transform`: transforms a resource - `package.transform`: transforms a package We\u0027ll see examples of these functions in the next few sections. ## Transforming a Resource Let\u0027s write our first transformation. Here, we will transform a data file (a resource) by defining a source resource, applying transform steps and getting back a resulting target resource: ```python script tabs=Python from frictionless import Resource, Pipeline, steps # Define source resource source = Resource(path=\"transform.csv\") # Create a pipeline pipeline = Pipeline(steps=[ steps.table_normalize(), steps.field_add(name=\"cars\", formula=\u0027population*2\u0027, descriptor={\u0027type\u0027: \u0027integer\u0027}), ]) # Apply transform pipeline target = source.transform(pipeline) # Print resulting schema and data print(target.schema) print(target.to_view()) ``` Let\u0027s break down the transforming steps we applied: 1. `steps.table_normalize` - cast data types and shape the table according to the schema, inferred or provided 1. `steps.field_add` - adds a field to data and metadata based on the information provided by the user There are many more available steps that we will cover below. ## Transforming a Package A package is a set of resources. Transforming a package means adding or removing resources and/or transforming those resources themselves. This example shows how transforming a package is similar to transforming a single resource: ```python script tabs=Python from frictionless import Package, Resource, transform, steps # Define source package source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) # Create a pipeline pipeline = Pipeline(steps=[ steps.resource_add(name=\"extra\", descriptor={\"data\": [[\u0027id\u0027, \u0027cars\u0027], [1, 166], [2, 132], [3, 94]]}), steps.resource_transform( name=\"main\", steps=[ steps.table_normalize(), steps.table_join(resource=\"extra\", field_name=\"id\"), ], ), steps.resource_remove(name=\"extra\"), ]) # Apply transform steps target = source.transform(pipeline) # Print resulting resources, schema and data print(target.resource_names) print(target.get_resource(\"main\").schema) print(target.get_resource(\"main\").to_view()) ``` We have basically done the same as in [Transforming a Resource](#transforming-a-resource) section. This example is quite artificial and created only to show how to join two resources, but hopefully it provides a basic understanding of how flexible package transformations can be. ## Transforming Pipeline A pipeline is a declarative way to write out metadata transform steps. With a pipeline, you can transform a resource, package, or write custom plugins too. For resource and package types it\u0027s mostly the same functionality as we have seen above, but written declaratively. So let\u0027s run the same resource transformation as we did in the [Transforming a Resource](#transforming-a-resource) section: ```python script tabs=Python from frictionless import Pipeline, transform pipeline = Pipeline.from_descriptor({ \"steps\": [ {\"type\": \"table-normalize\"}, { \"type\": \"field-add\", \"name\": \"cars\", \"formula\": \"population*2\", \"descriptor\": {\"type\": \"integer\"} }, ], }) print(pipeline) ``` So what\u0027s the reason to use declarative pipelines if it works the same as the Python code? The main difference is that pipelines can be saved as JSON files which can be shared among different users and used with CLI and API. For example, if you implement your own UI based on Frictionless Framework you can serialize the whole pipeline as a JSON file and send it to the server. This is the same for CLI - if your colleague has given you a `pipeline.json` file, you can run `frictionless transform pipeline.json` in the CLI to get the same results as they got. ## Available Steps Frictionless includes more than 40+ built-in transform steps. They are grouped by the object so you can find them easily using code auto completion in a code editor. For example, start typing `steps.table...` and you will see all the available steps for that group. The available groups are: - resource - table - field - row - cell See [Transform Steps](../steps/cell.md) for a list of all available steps. It is also possible to write custom transform steps: see the next section. ## Custom Steps Here is an example of a custom step written as a Python function. This example step removes a field from a data table (note: Frictionless already has a built-in function that does this same thing: `steps.field_remove`). ```python script tabs=Python from frictionless import Package, Resource, Step, transform, steps class custom_step(Step): def transform_resource(self, resource): current = resource.to_copy() # Data def data(): with current: for list in current.cell_stream: yield list[1:] # Meta resource.data = data resource.schema.remove_field(\"id\") source = Resource(\"transform.csv\") pipeline = Pipeline(steps=[custom_step()]) target = source.transform(pipeline) print(target.schema) print(target.to_view()) ``` As you can see you can implement any custom steps within a Python script. To make it work within a declarative pipeline you need to implement a plugin. Learn more about [Custom Steps](extension/step-guide.md) and [Plugins](extension/plugin-guide.md). ## Transform Utils \u003e Transform Utils is under construction. ## Working with PETL In some cases, it\u0027s better to use a lower-level API to achieve your goal. A resource can be exported as a PETL table. For more information please visit PETL\u0027s [documentation portal](https://petl.readthedocs.io/en/stable/). ```python script tabs=Python from frictionless import Resource resource = Resource(path=\u0027transform.csv\u0027) petl_table = resource.to_petl() # Use it with PETL framework print(petl_table) ```",
+      },
+          'docs/console/overview': {
+          'name': 'Overview',
+          'path': 'docs/console/overview',
+          'relpath': 'docs/console/overview',
+          'text': "# Overview The Command-Line interface is a vital part for the Frictionless Framework. While working within Python provides more flexibility, CLI is the easist way to interact with Frictionless. ```yaml video/youtube code: 7a_rL9j-gn8 ``` ## Install To install the package please follow the [Getting Started](../getting-started.html) guide. Usually, a simple installation using Pip or Anaconda will install the `frictionless` binary on your computer so you don\u0027t need to install CLI aditionally. ## Commands The `frictionless` binary requires providing a command like `describe` or `validate`: ```bash tabs=CLI frictionless describe # to describe your data frictionless explore # to explore your data frictionless extract # to extract your data frictionless index # to index your data frictionless list # to list your data frictionless publish # to publish your data frictionless query # to query your data frictionless script # to script your data frictionless validate # to validate your data frictionless --help # to get list of the command frictionless --version # to get the version ``` ## Arguments All the arguments for the main CLI command are the same as they are in Python. You can read [Guides](../guides/describing-data.html) and use almost all the information from there within the command-line. There is an important different in how arguments are written (note the dashes): ``` Python: validate(\u0027data/table.csv\u0027, limit_errors=1) CLI: $ validate data/table.csv --limit-errors 1 ``` To get help for a command and its arguments you can use the help flag with the command: ```bash tabs=CLI frictionless describe --help # to get help for describe frictionless extract --help # to get help for extract frictionless validate --help # to get help for validate frictionless transform --help # to get help for transform ``` ## Outputs Usually, Frictionless commands returns pretty-formatted tabular data like `extract` or `validate` do. For the `describe` command you get a metadata back and you can choose in what format to return it: ```bash tabs=CLI frictionless describe # default YAML with a commented front-matter frictionless describe --yaml # standard YAML frictionless describe --json # standard JSON ``` ## Errors The Frictionless\u0027 CLI interface should not fail with any internal Python errors with a traceback (a long listing of related code). If you see something like this please create an issue in the [Issue Tracker](https://github.com/frictionlessdata/frictionless-py/issues). ## Debug To debug a problem please use: ```bash tabs=CLI frictionless command --debug ```",
+      },
+          'docs/console/describe': {
+          'name': 'Describe',
+          'path': 'docs/console/describe',
+          'relpath': 'docs/console/describe',
+          'text': "# Describe ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `describe` command you can get a metadata of file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless describe tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless describe tables/*.csv --yaml ```",
+      },
+          'docs/console/convert': {
+          'name': 'Convert',
+          'path': 'docs/console/convert',
+          'relpath': 'docs/console/convert',
+          'text': "# Convert ```markdown remark type=warning This command currenlty is in active development and for dialect updated there are very few options available ``` ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `convert` command you can quickly convert a tabular data file from one format to another (or the same format with different dialect): ## Format Conversion For example, let\u0027s convert a CSV file into an Excel: ```bash tabs=CLI frictionless convert table.csv table.xlsx ``` ## Downloading Files The command can be used for downloading files as well. For example, let\u0027s cherry-pick one CSV file from a Zenodo dataset: ```bash tabs=CLI frictionless convert https://zenodo.org/record/3977957 --name aaawrestlers --to-path test.csv ``` ## Dialect Updates Consider, we want to change the CSV delimiter: ```bash tabs=CLI frictionless convert table.csv table-copy.csv --csv-delimiter ; ```",
+      },
+          'docs/console/explore': {
+          'name': 'Explore',
+          'path': 'docs/console/explore',
+          'relpath': 'docs/console/explore',
+          'text': "# Explore ```markdown remark type=info If you started an exploration session and can\u0027t get out: press \"q\" on your keyboard. ``` With the `explore` command you can open your dataset in [Visidata](https://www.visidata.org/) which is an amazing visual tool for working with tabular data in Console. For example try \"Shift+F\" for creating data histograms! ## Installation ```bash tabs=CLI pip install frictionless[visidata] pip install frictionless[visidata,zenodo] # for examples in this tutorial ``` ## Example For example, let\u0027s expore this interesing dataset: ```bash tabs=CLI frictionless explore https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/explore.png width: unset height: unset ``` ## Documentation Before entering Visidata, it\u0027s highly recommended to read its documentation: - https://www.visidata.org/docs/ You can get it in Console as well: ```bash script tabs=CLI vd --help ```",
+      },
+          'docs/console/extract': {
+          'name': 'Extract',
+          'path': 'docs/console/extract',
+          'relpath': 'docs/console/extract',
+          'text': "# Extract ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With Frtictionless `extract` command you can extract data from a file or a dataset. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script tabs=CLI frictionless extract tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script tabs=CLI frictionless extract tables/*.csv --yaml ```",
+      },
+          'docs/console/index': {
+          'name': 'Index',
+          'path': 'docs/console/index',
+          'relpath': 'docs/console/index',
+          'text': "# Index ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. ## Installation ```bash tabs=CLI pip install frictionless[sql] ``` ## Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db frictionless extract sqlite:///index/project.db --table table --json ``` ## Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --fast frictionless extract sqlite:///index/project.db --table table --json ``` ### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ```",
+      },
+          'docs/console/list': {
+          'name': 'List',
+          'path': 'docs/console/list',
+          'relpath': 'docs/console/list',
+          'text': "# List ```markdown remark type=info The difference with between `describe` and `list` command: if `datapackage.json` is not provided `describe` will load a sample from every tabular data file in a dataset and infer a schema while `list` is a very lean and quick command operating only with available metadata and not touching actual data files. ``` With Frtictionless `list` command you can get a list of resources from a data source. For more detailed output see [`describe`](describe.html) command. ## Normal Mode By default, it outputs metadata visually formatted: ```bash script frictionless list tables/*.csv ``` ## Yaml/Json Mode It\u0027s possible to output as `YAML` or `JSON`, for example: ```bash script frictionless list tables/*.csv --yaml ```",
+      },
+          'docs/console/publish': {
+          'name': 'Publish',
+          'path': 'docs/console/publish',
+          'relpath': 'docs/console/publish',
+          'text': "# Publish ```markdown remark type=warning Currently, only publishing to CKAN is supported; Github and Zenodo are in active development. ``` With `publish` command you can publish your dataset to a data publishing platform like CKAN: ```bash frictionless publish data/tables/*.csv --target http://ckan:5000/dataset/my-best --title \"My best dataset\" ``` It will ask for an API Key to upload your metadata and data. As a result: ```yaml image path: ../../assets/publish.png width: unset height: unset ```",
+      },
+          'docs/console/query': {
+          'name': 'Query',
+          'path': 'docs/console/query',
+          'relpath': 'docs/console/query',
+          'text': "# Query ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `query` command you can explore tabular files within a Sqlite database. ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless query https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/query.png width: unset height: unset ```",
+      },
+          'docs/console/script': {
+          'name': 'Script',
+          'path': 'docs/console/script',
+          'relpath': 'docs/console/script',
+          'text': "# Script ```markdown remark type=info Wit this command Frictionless will drop all invalid data like type errors in cells. Use `validate` if needed. ``` With `script` command you can explore tabular files with Pandas by one console command ## Installation ```bash tabs=CLI pip install frictionless[sql] pip install frictionless[sql,zenodo] # for examples in this tutorial ``` ## Usage ```bash frictionless script https://zenodo.org/record/3977957 ``` ```yaml image path: ../../assets/script.png width: unset height: unset ```",
+      },
+          'docs/console/validate': {
+          'name': 'Validate',
+          'path': 'docs/console/validate',
+          'relpath': 'docs/console/validate',
+          'text': "# Validate ```markdown remark type=warning For more information for data validation with Frictionless, read this [Validating Data](../guides/validating-data.html) tutorial. ``` With `validate` command you can validate your tabular files (indivisual or the whole dataset). For example: ```bash script tabs=CLI frictionless validate table.csv invalid.csv ```",
+      },
+          'docs/framework/actions': {
+          'name': 'Data Actions',
+          'path': 'docs/framework/actions',
+          'relpath': 'docs/framework/actions',
+          'text': "# Data Actions ## Describe Describe is a high-level function (action) to infer a metadata from a data source. ### Example ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) print(resource) ``` ### Reference ```yaml reference level: 4 references: - frictionless.describe ``` ## Extract Extract is a high-level function (action) to read tabular data from a data source. The output is encoded in \u0027utf-8\u0027 scheme. ### Example ```python script tabs=Python from pprint import pprint from frictionless import extract rows = extract(\u0027table.csv\u0027) pprint(rows) ``` ### Reference ```yaml reference level: 4 references: - frictionless.extract ``` ## Validate Validate is a high-level function (action) to validate data from a data source. ### Example ```python script tabs=Python from frictionless import validate report = validate(\u0027table.csv\u0027) print(report.valid) ``` ### Reference ```yaml reference level: 4 references: - frictionless.validate ``` ## Transform Transform is a high-level function (action) to transform tabular data from a data source. ### Example ```python script tabs=Python from frictionless import transform, steps resource = transform(\u0027table.csv\u0027, steps=[steps.cell_set(field_name=\u0027name\u0027, value=\u0027new\u0027)]) print(resource.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.transform ```",
+      },
+          'docs/framework/catalog': {
+          'name': 'Catalog Class',
+          'path': 'docs/framework/catalog',
+          'relpath': 'docs/framework/catalog',
+          'text': "# Catalog Class ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Catalog is a set of data packages. ## Creating Catalog We can create a catalog providing a list of data packages: ```python tabs=Python from frictionless import Catalog, Dataset, Package catalog = Catalog(datasets=[Dataset(name=\u0027name\u0027, package=Package(\u0027tables/*\u0027))]) ``` ## Describing Catalog Usually Catalog is used to describe some external set of datasets like a CKAN instance or a Github user or search. For example: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) print(catalog) ``` ## Dataset Management The core purpose of having a catalog is to provide an ability to have a set of datasets. The Catalog class provides useful methods to manage datasets: ```python tabs=Python from frictionless import Catalog catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.dataset_names catalog.has_dataset catalog.add_dataset catalog.get_dataset catalog.clear_datasets ``` ## Saving Descriptor As any of the Metadata classes the Catalog class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package catalog = Catalog(\u0027https://demo.ckan.org/dataset/\u0027) catalog.to_json(\u0027datacatalog.json\u0027) # Save as JSON catalog.to_yaml(\u0027datacatalog.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Catalog - frictionless.Dataset ```",
+      },
+          'docs/framework/package': {
+          'name': 'Package Class',
+          'path': 'docs/framework/package',
+          'relpath': 'docs/framework/package',
+          'text': "# Package Class The Data Package is a core Frictionless Data concept meaning a set of resources with additional metadata provided. You can read [Data Package Standard](https://specs.frictionlessdata.io/data-package/) for more information. ## Creating Package Let\u0027s create a data package: ```python tabs=Python from frictionless import Package, Resource package = Package(\u0027table.csv\u0027) # from a resource path package = Package(\u0027tables/*\u0027) # from a resources glob package = Package([\u0027tables/chunk1.csv\u0027, \u0027tables/chunk2.csv\u0027]) # from a list package = Package(\u0027package/datapackage.json\u0027) # from a descriptor path package = Package({\u0027resources\u0027: {\u0027path\u0027: \u0027table.csv\u0027}}) # from a descriptor package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments ``` As you can see it\u0027s possible to create a package providing different kinds of sources which will be detected to have some type automatically (e.g. whether it\u0027s a glob or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Package, Resource package = Package(resources=[Resource(path=\u0027table.csv\u0027)]) # from arguments package = Package(\u0027datapackage.json\u0027) # from a descriptor ``` ## Describing Package The standards support a great deal of package metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Package, Resource package = Package( name=\u0027package\u0027, title=\u0027My Package\u0027, description=\u0027My Package for the Guide\u0027, resources=[Resource(path=\u0027table.csv\u0027)], # it\u0027s possible to provide all the official properties like homepage, version, etc ) print(package) ``` If you have created a package, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) print(package.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Package package = Package(\u0027datapackage.json\u0027) package.name = \u0027new-name\u0027 package.title = \u0027New Title\u0027 package.description = \u0027New Description\u0027 # and others print(package) ``` ## Resource Management The core purpose of having a package is to provide an ability to have a set of resources. The Package class provides useful methods to manage resources: ```python script tabs=Python from frictionless import Package, Resource package = Package(\u0027datapackage.json\u0027) print(package.resources) print(package.resource_names) package.add_resource(Resource(name=\u0027new\u0027, data=[[\u0027key1\u0027, \u0027key2\u0027], [\u0027val1\u0027, \u0027val2\u0027]])) resource = package.get_resource(\u0027new\u0027) print(package.has_resource(\u0027new\u0027)) package.remove_resource(\u0027new\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Package class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Package package = Package(\u0027tables/*\u0027) package.to_json(\u0027datapackage.json\u0027) # Save as JSON package.to_yaml(\u0027datapackage.yaml\u0027) # Save as YAML ``` ## Reference ```yaml reference references: - frictionless.Package ```",
+      },
+          'docs/framework/resource': {
+          'name': 'Resource Class',
+          'path': 'docs/framework/resource',
+          'relpath': 'docs/framework/resource',
+          'text': "# Resource Class The Resource class is arguable the most important class of the whole Frictionless Framework. It\u0027s based on [Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) and [Tabular Data Resource Standard](https://specs.frictionlessdata.io/data-resource/) ## Creating Resource Let\u0027s create a data resource: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) # from a resource path resource = Resource(\u0027resource.json\u0027) # from a descriptor path resource = Resource({\u0027path\u0027: \u0027table.csv\u0027}) # from a descriptor resource = Resource(path=\u0027table.csv\u0027) # from arguments ``` As you can see it\u0027s possible to create a resource providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a descriptor or a path). It\u0027s possible to make this step more explicit: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) # from a path resource = Resource(\u0027data/resource.json\u0027) # from a descriptor ``` ## Describing Resource The standards support a great deal of resource metadata which is possible to have with Frictionless Framework too: ```python script tabs=Python from frictionless import Resource resource = Resource( name=\u0027resource\u0027, title=\u0027My Resource\u0027, description=\u0027My Resource for the Guide\u0027, path=\u0027table.csv\u0027, # it\u0027s possible to provide all the official properties like mediatype, etc ) print(resource) ``` If you have created a resource, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) print(resource.name) # and others ``` And edit them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027resource.json\u0027) resource.name = \u0027new-name\u0027 resource.title = \u0027New Title\u0027 resource.description = \u0027New Description\u0027 # and others print(resource) ``` ## Saving Descriptor As any of the Metadata classes the Resource class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.to_json(\u0027resource.json\u0027) # Save as JSON resource.to_yaml(\u0027resource.yaml\u0027) # Save as YAML ``` ## Resource Lifecycle You might have noticed that we had to duplicate the `with Resource(...)` statement in some examples. The reason is that Resource is a streaming interface. Once it\u0027s read you need to open it again. Let\u0027s show it in an example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) resource.open() pprint(resource.read_rows()) pprint(resource.read_rows()) # We need to re-open: there is no data left resource.open() pprint(resource.read_rows()) # We need to close manually: not context manager is used resource.close() ``` At the same you can read data for a resource without opening and closing it explicitly. In this case Frictionless Framework will open and close the resource for you so it will be basically a one-time operation: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027capital-3.csv\u0027) pprint(resource.read_rows()) ``` ## Reading Data The Resource class is also a metadata class which provides various read and stream functions. The `extract` functions always read rows into memory; Resource can do the same but it also gives a choice regarding output data. It can be `rows`, `data`, `text`, or `bytes`. Let\u0027s try reading all of them: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027country-3.csv\u0027) pprint(resource.read_bytes()) pprint(resource.read_text()) pprint(resource.read_cells()) pprint(resource.read_rows()) ``` It\u0027s really handy to read all your data into memory but it\u0027s not always possible if a file is really big. For such cases, Frictionless provides streaming functions: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027) as resource: pprint(resource.byte_stream) pprint(resource.text_stream) pprint(resource.cell_stream) pprint(resource.row_stream) for row in resource.row_stream: print(row) ``` ## Indexing Data ```markdown remark type=warning This functionality has been published in `frictionless@5.5` as a feature preview and request for comments. The implementation is raw and doesn\u0027t cover many edge cases. ``` Indexing resource in Frictionless terms means loading a data table into a database. Let\u0027s explore how this feature works in different modes. \u003e All the example are written for SQLite for simplicity ### Normal Mode This mode is supported for any database that is supported by `sqlalchemy`. Under the hood, Frictionless will infer Table Schema and populate the data table as it normally reads data. It means that type errors will be replaced by `null` values and in-general it guarantees to finish successfully for any data even very invalid. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` ### Fast Mode ```markdown remark type=warning For the SQLite in fast mode, it requires `sqlite3@3.34+` command to be available. ``` Fast mode is supported for SQLite and Postgresql databases. It will infer Table Schema using a data sample and index data using `COPY` in Potgresql and `.import` in SQLite. For big data files this mode will be 10-30x faster than normal indexing but the speed comes with the price -- if there is invalid data the indexing will fail. ```bash script tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast frictionless extract sqlite:///index/project.db --table table --json ``` ```python script tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True) print(Resource(\u0027sqlite:///index/project.db\u0027, control=formats.sql.SqlControl(table=\u0027table\u0027)).extract()) ``` #### Solution 1: Fallback To ensure that the data will be successfully indexed it\u0027s possible to use `fallback` option. If the fast indexing fails Frictionless will start over in normal mode and finish the process successfully. ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --fallback ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, fallback=True) ``` #### Solution 2: QSV Another option is to provide a path to [QSV](https://github.com/jqnatividad/qsv) binary. In this case, initial schema inferring will be done based on the whole data file and will guarantee that the table is valid type-wise: ```bash tabs=CLI frictionless index table.csv --database sqlite:///index/project.db --name table --fast --qsv qsv_path ``` ```python tabs=Python import sqlite3 from frictionless import Resource, formats resource = Resource(\u0027table.csv\u0027) resource.index(\u0027sqlite:///index/project.db\u0027, name=\u0027table\u0027, fast=True, qsv_path=\u0027qsv_path\u0027) ``` ## Scheme The scheme also know as protocol indicates which loader Frictionless should use to read or write data. It can be `file` (default), `text`, `http`, `https`, `s3`, and others. ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2\u0027, format=\u0027csv\u0027) as resource: print(resource.scheme) print(resource.to_view()) ``` ## Format The format or as it\u0027s also called extension helps Frictionless to choose a proper parser to handle the file. Popular formats are `csv`, `xlsx`, `json` and others ```python script tabs=Python from frictionless import Resource with Resource(b\u0027header1,header2\\nvalue1,value2.csv\u0027, format=\u0027csv\u0027) as resource: print(resource.format) print(resource.to_view()) ``` ## Encoding Frictionless automatically detects encoding of files but sometimes it can be inaccurate. It\u0027s possible to provide an encoding manually: ```python script tabs=Python from frictionless import Resource with Resource(\u0027country-3.csv\u0027, encoding=\u0027utf-8\u0027) as resource: print(resource.encoding) print(resource.path) ``` ``` utf-8 data/country-3.csv ``` ## Innerpath By default, Frictionless uses the first file found in a zip archive. It\u0027s possible to adjust this behaviour: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table-multiple-files.zip\u0027, innerpath=\u0027table-reverse.csv\u0027) as resource: print(resource.compression) print(resource.innerpath) print(resource.to_view()) ``` ## Compression It\u0027s possible to adjust compression detection by providing the algorithm explicitly. For the example below it\u0027s not required as it would be detected anyway: ```python script tabs=Python from frictionless import Resource with Resource(\u0027table.csv.zip\u0027, compression=\u0027zip\u0027) as resource: print(resource.compression) print(resource.to_view()) ``` ## Dialect Please read [Table Dialect Guide](dialect.html) for more information. ## Schema Please read [Table Schema Guide](schema.html) for more information. ## Checklist Please read [Checklist Guide](checklist.html) for more information. ## Pipeline Please read [Pipeline Guide](pipeline.html) for more information. ## Stats Resource\u0027s stats can be accessed with `resource.stats`: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) resource.infer(stats=True) print(resource.stats) ``` ## Reference ```yaml reference references: - frictionless.Resource ```",
+      },
+          'docs/framework/dialect': {
+          'name': 'Dialect Class',
+          'path': 'docs/framework/dialect',
+          'relpath': 'docs/framework/dialect',
+          'text': "# Dialect Class The Table Dialect is a core Frictionless Data concept meaning a metadata information regarding tabular data source. The Table Dialect concept give us an ability to manage table header and any details related to specific formats. ## Dialect The Dialect class instance are accepted by many classes and functions: - Resource - describe - extract - validate - and more You just need to create a Dialect instance using desired options and pass to the classed and function from above. We will show it on this examplar table: ```bash script tabs=CLI cat capital-3.csv ``` ## Header It\u0027s a boolean flag which defaults to `True` indicating whether the data has a header row or not. In the following example the header row will be treated as a data row: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header=False) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header.labels) print(resource.to_view()) ``` ## Header Rows If header is `True` which is default, this parameters indicates where to find the header row or header rows for a multiline header. Let\u0027s see on example how the first two data rows can be treated as a part of a header: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Join If there are multiple header rows which is managed by `header_rows` parameter, we can set a string to be a separator for a header\u0027s cell join operation. Usually it\u0027s very handy for some \"fancy\" Excel files. For the sake of simplicity, we will show on a CSV file: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(header_rows=[1, 2, 3], header_join=\u0027/\u0027) with Resource(\u0027capital-3.csv\u0027, dialect=dialect) as resource: print(resource.header) print(resource.to_view()) ``` ## Header Case By default a header is validated in a case sensitive mode. To disable this behaviour we can set the `header_case` parameter to `False`. This option is accepted by any Dialect and a dialect can be passed to `extract`, `validate` and other functions. Please note that it doesn\u0027t affect a resulting header it only affects how it\u0027s validated: ```python script tabs=Python from frictionless import Resource, Schema, Dialect, fields dialect = Dialect(header_case=False) schema = Schema(fields=[fields.StringField(name=\"ID\"), fields.StringField(name=\"NAME\")]) with Resource(\u0027capital-3.csv\u0027, dialect=dialect, schema=schema) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) # without \"header_case\" it will have 2 errors ``` ## Comment Char Specifies char used to comment the rows: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_char=\"#\") with Resource(b\u0027name\\n#row1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Comment Rows A list of rows to ignore: ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(comment_rows=[2]) with Resource(b\u0027name\\nrow1\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Skip Blank Rows Ignores rows if they are completely blank. ```python script tabs=Python from frictionless import Resource, Dialect dialect = Dialect(skip_blank_rows=True) with Resource(b\u0027name\\n\\nrow2\u0027, format=\"csv\", dialect=dialect) as resource: print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Dialect - frictionless.Control ```",
+      },
+          'docs/framework/schema': {
+          'name': 'Schema Class',
+          'path': 'docs/framework/schema',
+          'relpath': 'docs/framework/schema',
+          'text': "# Schema Class The Table Schema is a core Frictionless Data concept meaning a metadata information regarding tabular data source. You can read [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/) for more information. ## Creating Schema Let\u0027s create a table schema: ```python script tabs=Python from frictionless import Schema, fields, describe schema = describe(\u0027table.csv\u0027, type=\u0027schema\u0027) # from a resource path schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor path schema = Schema.from_descriptor({\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}]}) # from a descriptor ``` As you can see it\u0027s possible to create a schema providing different kinds of sources which will be detector to have some type automatically (e.g. whether it\u0027s a dict or a path). It\u0027s possible to make this step more explicit: ```python script tabs=Python from frictionless import Schema, Field schema = Schema(fields=[fields.StringField(name=\u0027id\u0027)]) # from fields schema = Schema.from_descriptor(\u0027schema.json\u0027) # from a descriptor ``` ## Describing Schema The standard support some additional schema\u0027s metadata: ```python script tabs=Python from frictionless import Schema, fields schema = Schema( fields=[fields.StringField(name=\u0027id\u0027)], missing_values=[\u0027na\u0027], primary_key=[\u0027id\u0027], # foreign_keys ) print(schema) ``` If you have created a schema, for example, from a descriptor you can access this properties: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.missing_values) # and others ``` And edit them: ```python script tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.json\u0027) schema.missing_values.append(\u0027-\u0027) # and others print(schema) ``` ## Field Management The Schema class provides useful methods to manage fields: ```python script tabs=Python from frictionless import Schema, fields schema = Schema.from_descriptor(\u0027schema.json\u0027) print(schema.fields) print(schema.field_names) schema.add_field(fields.StringField(name=\u0027new-name\u0027)) field = schema.get_field(\u0027new-name\u0027) print(schema.has_field(\u0027new-name\u0027)) schema.remove_field(\u0027new-name\u0027) ``` ## Saving Descriptor As any of the Metadata classes the Schema class can be saved as JSON or YAML: ```python tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027id\u0027)]) schema.to_json(\u0027schema.json\u0027) # Save as JSON schema.to_yaml(\u0027schema.yaml\u0027) # Save as YAML ``` ## Reading Cells During the process of data reading a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.read_cells([\u00273\u0027, \u0027value\u0027]) print(cells) ``` ## Writing Cells During the process of data writing a resource uses a schema to convert data: ```python script tabs=Python from frictionless import Schema, fields schema = Schema(fields=[fields.IntegerField(name=\u0027integer\u0027), fields.StringField(name=\u0027string\u0027)]) cells, notes = schema.write_cells([3, \u0027value\u0027]) print(cells) ``` ## Creating Field Let\u0027s create a field: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) print(field) ``` Usually we work with fields which were already created by a schema: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) print(field) ``` ## Field Types Frictionless Framework supports all the [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#types-and-formats) field types along with an ability to create custom types. For some types there are additional properties available: ```python script tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027) field = resource.schema.get_field(\u0027id\u0027) # it\u0027s an integer print(field.bare_number) ``` See the complete reference at [Tabular Fields](../fields/any.html). ## Reading Cell During the process of data reading a schema uses a field internally. If needed a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.read_cell(\u00273\u0027) print(cell) ``` ## Writing Cell During the process of data writing a schema uses a field internally. The same as with reading a user can convert their data using this interface: ```python script tabs=Python from frictionless import fields field = fields.IntegerField(name=\u0027name\u0027) cell, note = field.write_cell(3) print(cell) ``` ## Reference ```yaml reference references: - frictionless.Schema - frictionless.Field ```",
+      },
+          'docs/framework/checklist': {
+          'name': 'Checklist Class',
+          'path': 'docs/framework/checklist',
+          'relpath': 'docs/framework/checklist',
+          'text': "# Checklist Class ## Creating Checklist Checklist is a set of validation checks and a few addition settings. Let\u0027s create a checklist: ```python script tabs=Python from frictionless import Checklist, checks checklist = Checklist(checks=[checks.row_constraint(formula=\u0027id \u003e 1\u0027)]) print(checklist) ``` ## Validation Checks The Check concept is a part of the Validation API. You can create a custom Check to be used as part of resource or package validation. ```python title=\"Python\" from frictionless import Check, errors class duplicate_row(Check): code = \"duplicate-row\" Errors = [errors.DuplicateRowError] def __init__(self, descriptor=None): super().__init__(descriptor) self.__memory = {} def validate_row(self, row): text = \",\".join(map(str, row.values())) hash = hashlib.sha256(text.encode(\"utf-8\")).hexdigest() match = self.__memory.get(hash) if match: note = \u0027the same as row at position \"%s\"\u0027 % match yield errors.DuplicateRowError.from_row(row, note=note) self.__memory[hash] = row.row_position # Metadata metadata_profile = { # type: ignore \"type\": \"object\", \"properties\": {}, } ``` It\u0027s usual to create a custom [Error](../../docs/framework/classes.html) along side with a Custom Check. ## Reference ```yaml reference references: - frictionless.Checklist - frictionless.Check ```",
+      },
+          'docs/framework/pipeline': {
+          'name': 'Pipeline Class',
+          'path': 'docs/framework/pipeline',
+          'relpath': 'docs/framework/pipeline',
+          'text': "# Pipeline Class Pipeline is a object containg a list of transformation steps. ## Creating Pipeline Let\u0027s create a pipeline using Python interface: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) print(pipeline) ``` ## Running Pipeline To run a pipeline you need to use a transform function or method: ```python script tabs=Python from frictionless import Pipeline, transform, steps pipeline = Pipeline(steps=[steps.table_normalize(), steps.table_melt(field_name=\u0027name\u0027)]) resource = transform(\u0027table.csv\u0027, pipeline=pipeline) print(resource.schema) print(resource.read_rows()) ``` ## Transform Steps The Step concept is a part of the Transform API. You can create a custom Step to be used as part of resource or package transformation. \u003e This step uses PETL under the hood. ```python title=\"Python\" from frictionless import Step class cell_set(Step): code = \"cell-set\" def __init__(self, descriptor=None, *, value=None, field_name=None): self.setinitial(\"value\", value) self.setinitial(\"fieldName\", field_name) super().__init__(descriptor) def transform_resource(self, resource): value = self.get(\"value\") field_name = self.get(\"fieldName\") yield from resource.to_petl().update(field_name, value) ``` ## Reference ```yaml reference references: - frictionless.Pipeline - frictionless.Step ```",
+      },
+          'docs/framework/detector': {
+          'name': 'Detector Class',
+          'path': 'docs/framework/detector',
+          'relpath': 'docs/framework/detector',
+          'text': "# Detector Class The Detector object can be used in various places within the Framework. The main purpose of this class is to tweak how different aspects of metadata are detected. Here is a quick example: ```bash script tabs=CLI frictionless extract table.csv --field-missing-values 1,2 ``` ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(field_missing_values=[\u00271\u0027, \u00272\u0027]) resource = Resource(\u0027table.csv\u0027, detector=detector) print(resource.read_rows()) ``` Many options below have their CLI equivalent. Please consult with the CLI help. ## Detector Usage The detector class instance are accepted by many classes and functions: - Package - Resource - describe - extract - validate - and more You just need to create a Detector instance using desired options and pass to the classed and function from above. ## Buffer Size By default, Frictionless will use the first 10000 bytes to detect encoding. Including more bytes by increasing buffer_size can improve the inference. However, it will be slower, but the encoding detection will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(buffer_size=100000) resource = describe(\"country-1.csv\", detector=detector) print(resource.encoding) ``` ## Sample Size By default, Frictionless will use the first 100 rows to detect field types. Including more samples by increasing sample_size can improve the inference. However, it will be slower, but the result will be more accurate. ```python script tabs=Python from frictionless import Detector, describe detector = Detector(sample_size=1000) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Encoding Function By default, Frictionless encoding_function is None and user can use built in encoding functions. But user has option to implement their own encoding using this feature. The following example simply returns utf-8 encoding but user can add more complex logic to the encoding function. ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(encoding_function=lambda sample: \"utf-8\") with Resource(\"table.csv\", detector=detector) as resource: print(resource.encoding) ``` ## Field Type This option allows manually setting all the field types to a given type. It\u0027s useful when you need to skip data casting (setting `any` type) or have everything as a string (setting `string` type): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_type=\u0027string\u0027) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Names Sometimes you don\u0027t want to use existent header row to compose field names. It\u0027s possible to provide custom names: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_names=[\"f1\", \"f2\", \"f3\", \"f4\"]) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema.field_names) ``` ## Field Confidence By default, Frictionless uses 0.9 (90%) confidence level for data types detection. It means that it there are 9 integers in a field and one string it will be inferred as an integer. If you want a guarantee that an inferred schema will conform to the data you can set it to 1 (100%): ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_confidence=1) resource = describe(\"country-1.csv\", detector=detector) print(resource.schema) ``` ## Field Float Numbers By default, Frictionless will consider that all non integer numbers are decimals. It\u0027s possible to make them float which is a faster data type: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_float_numbers=True) resource = describe(\"floats.csv\", detector=detector) print(resource.schema) print(resource.read_rows()) ``` ## Field Missing Values Missing Values is an important concept in data description. It provides information about what cell values should be considered as nulls. We can customize the defaults: ```python script tabs=Python from frictionless import Detector, describe detector = Detector(field_missing_values=[\"\", \"1\", \"2\"]) resource = describe(\"table.csv\", detector=detector) print(resource.schema.missing_values) print(resource.read_rows()) ``` As we can see, the textual values equal to \"67\" are now considered nulls. Usually, it\u0027s handy when you have data with values like: \u0027-\u0027, \u0027n/a\u0027, and similar. ## Schema Sync There is a way to sync provided schema based on a header row\u0027s field order. It\u0027s very useful when you have a schema that describes a subset or a superset of the resource\u0027s fields: ```python script tabs=Python from frictionless import Detector, Resource, Schema, fields # Note the order of the fields detector = Detector(schema_sync=True) schema = Schema(fields=[fields.StringField(name=\u0027name\u0027), fields.IntegerField(name=\u0027id\u0027)]) with Resource(\u0027table.csv\u0027, schema=schema, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Schema Patch Sometimes we just want to update only a few fields or some schema\u0027s properties without providing a brand new schema. For example, the two examples above can be simplified as: ```python script tabs=Python from frictionless import Detector, Resource detector = Detector(schema_patch={\u0027fields\u0027: {\u0027id\u0027: {\u0027type\u0027: \u0027string\u0027}}}) with Resource(\u0027table.csv\u0027, detector=detector) as resource: print(resource.schema) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.Detector ```",
+      },
+          'docs/framework/inquiry': {
+          'name': 'Inquiry Class',
+          'path': 'docs/framework/inquiry',
+          'relpath': 'docs/framework/inquiry',
+          'text': "# Inquiry Class The Inquiry gives you an ability to create arbitrary validation jobs containing a set of individual validation tasks. ## Creating Inquiry Let\u0027s create an inquiry that includes an individual file validation and a resource validation: ```python script tabs=Python from frictionless import Inquiry inquiry = Inquiry.from_descriptor({\u0027tasks\u0027: [ {\u0027path\u0027: \u0027capital-valid.csv\u0027}, {\u0027path\u0027: \u0027capital-invalid.csv\u0027}, ]}) inquiry.to_yaml(\u0027capital.inquiry-example.yaml\u0027) print(inquiry) ``` ## Validating Inquiry Tasks in the Inquiry accept the same arguments written in camelCase as the corresponding `validate` functions have. As usual, let\u0027 run validation: ```bash script tabs=CLI frictionless validate capital.inquiry-example.yaml ``` At first sight, it\u0027s no clear why such a construct exists but when your validation workflow gets complex, the Inquiry can provide a lot of flexibility and power. Last but not least, the Inquiry will use multiprocessing if there are more than 1 task provided. ## Reference ```yaml reference references: - frictionless.Inquiry - frictionless.InquiryTask ```",
+      },
+          'docs/framework/report': {
+          'name': 'Report Class',
+          'path': 'docs/framework/report',
+          'relpath': 'docs/framework/report',
+          'text': "# Report Class ## Validation Report All the `validate` functions return the Validation Report. It\u0027s an unified object containing information about a validation: source details, found error, etc. Let\u0027s explore a report: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) print(report) ``` As we can see, there are a lot of information; you can find its details description in \"API Reference\". Errors are grouped by tables; for some validation there are can be dozens of tables. Let\u0027s use the `report.flatten` function to simplify errors representation: ```python script tabs=Python from pprint import pprint from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) pprint(report.flatten([\u0027rowNumber\u0027, \u0027fieldNumber\u0027, \u0027code\u0027, \u0027message\u0027])) ``` In some situation, an error can\u0027t be associated with a table; then it goes to the top-level `report.errors` property: ```python script tabs=Python from frictionless import validate report = validate(\u0027bad.json\u0027, type=\u0027schema\u0027) print(report) ``` ## Validation Errors The Error object is at the heart of the validation process. The Report has `report.errors` and `report.tables[].errors` properties that can contain the Error object. Let\u0027s explore it: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(f\u0027Type: \"{error.type}\"\u0027) print(f\u0027Title: \"{error.title}\"\u0027) print(f\u0027Tags: \"{error.tags}\"\u0027) print(f\u0027Note: \"{error.note}\"\u0027) print(f\u0027Message: \"{error.message}\"\u0027) print(f\u0027Description: \"{error.description}\"\u0027) ``` Above, we have listed universal error properties. Depending on the type of an error there can be additional ones. For example, for our `duplicate-label` error: ```python script tabs=Python from frictionless import validate report = validate(\u0027capital-invalid.csv\u0027, pick_errors=[\u0027duplicate-label\u0027]) error = report.task.error # it\u0027s only available for 1 table / 1 error sitution print(error) ``` Please explore \"Errors Reference\" to learn about all the available errors and their properties. ## Reference ```yaml reference references: - frictionless.Report - frictionless.ReportTask ```",
+      },
+          'docs/framework/table': {
+          'name': 'Table Classes',
+          'path': 'docs/framework/table',
+          'relpath': 'docs/framework/table',
+          'text': "# Table Classes ## Table Header After opening a resource you get access to a `resource.header` object which describes the resource in more detail. This is a list of normalized labels but also provides some additional functionality. Let\u0027s take a look: ```python script tabs=Python from frictionless import Resource with Resource(\u0027capital-3.csv\u0027) as resource: print(f\u0027Header: {resource.header}\u0027) print(f\u0027Labels: {resource.header.labels}\u0027) print(f\u0027Fields: {resource.header.fields}\u0027) print(f\u0027Field Names: {resource.header.field_names}\u0027) print(f\u0027Field Numbers: {resource.header.field_numbers}\u0027) print(f\u0027Errors: {resource.header.errors}\u0027) print(f\u0027Valid: {resource.header.valid}\u0027) print(f\u0027As List: {resource.header.to_list()}\u0027) ``` The example above shows a case when a header is valid. For a header that contains errors in its tabular structure, this information can be very useful, revealing discrepancies, duplicates or missing cell information: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027, \u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: pprint(resource.header.errors) ``` ## Table Row The `extract`, `resource.read_rows()` and other functions return or yield row objects. In Python, this returns a dictionary with the following information. Note: this example uses the [Detector object](/docs/guides/framework/detector-guide), which tweaks how different aspects of metadata are detected. ```python script tabs=Python from frictionless import Resource, Detector detector = Detector(schema_patch={\u0027missingValues\u0027: [\u00271\u0027]}) with Resource(\u0027capital-3.csv\u0027, detector=detector) as resource: for row in resource.row_stream: print(f\u0027Row: {row}\u0027) print(f\u0027Cells: {row.cells}\u0027) print(f\u0027Fields: {row.fields}\u0027) print(f\u0027Field Names: {row.field_names}\u0027) print(f\u0027Value of field \"name\": {row[\"name\"]}\u0027) # accessed as a dict print(f\u0027Row Number: {row.row_number}\u0027) # counted row number starting from 1 print(f\u0027Blank Cells: {row.blank_cells}\u0027) print(f\u0027Error Cells: {row.error_cells}\u0027) print(f\u0027Errors: {row.errors}\u0027) print(f\u0027Valid: {row.valid}\u0027) print(f\u0027As Dict: {row.to_dict(json=False)}\u0027) print(f\u0027As List: {row.to_list(json=True)}\u0027) # JSON compatible data types break ``` As we can see, this output provides a lot of information which is especially useful when a row is not valid. Our row is valid but we demonstrated how it can preserve data about missing values. It also preserves data about all cells that contain errors: ```python script tabs=Python from pprint import pprint from frictionless import Resource with Resource([[\u0027name\u0027], [\u0027value\u0027, \u0027value\u0027]]) as resource: for row in resource.row_stream: pprint(row.errors) ``` ## Reference ```yaml reference references: - frictionless.Header - frictionless.Row ```",
+      },
+          'docs/framework/error': {
+          'name': 'Error Class',
+          'path': 'docs/framework/error',
+          'relpath': 'docs/framework/error',
+          'text': "# Error Class The Error class is a metadata with no behavior. It\u0027s used to describe an error that happened during Framework work or during the validation. To create a custom error you basically just need to fill the required class fields: ```python title=\"Python\" from frictionless import errors class DuplicateRowError(errors.RowError): code = \"duplicate-row\" name = \"Duplicate Row\" tags = [\"#table\", \"#row\", \"#duplicate\"] template = \"Row at position {rowPosition} is duplicated: {note}\" description = \"The row is duplicated.\" ``` ## Reference ```yaml reference references: - frictionless.Error ```",
+      },
+          'docs/advanced/design': {
+          'name': 'Design',
+          'path': 'docs/advanced/design',
+          'relpath': 'docs/advanced/design',
+          'text': "# Design This guides provides a high-level overview of the Frictionless Framework architecture. It will be useful for plugin authors and advanced users. ## Reading Flow Frictionless uses modular approach for its architecture. During reading a data source goes through various subsystems which are selected depending on the data characteristics: ![Reading](../../assets/reading.png)",
+      },
+          'docs/advanced/system': {
+          'name': 'System',
+          'path': 'docs/advanced/system',
+          'relpath': 'docs/advanced/system',
+          'text': "# System ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` ## System Object The most important undelaying object in the Frictionless Framework is `system`. It\u0027s an singleton object avaialble as `frictionless.system`. ## System Context Using the `system` object a user can alter the execution context. It uses a Python context manager so it can be used in anyway that it\u0027s possible in Python, for example, it can be nested or combined. ### trusted If data or metadata comes from a trusted origin, it\u0027s possible to disable safety checks for paths: ```python with system.use_context(trusted=True): extract(\u0027/path/to/file/is/absolute.csv\u0027) ``` ### onerror To raise warning or errors on data problems, it\u0027s possible to use `onerror` context value. It\u0027s default to `ignore` and can be set to `warn` or `error`: ```python with system.use_context(onerror=\u0027error\u0027): extract(\u0027table-with-error-will-raise-an-exeption.csv\u0027) ``` ### standards By default, the framework uses coming `v2` version of the standards for outputing metadata. It\u0027s possible to alter this behaviour: ```python with system.use_context(standards=\u0027v1\u0027): describe(\u0027metadata-will-be-in-v1.csv\u0027) ``` ### http_session It\u0027s possible to provide a custom `requests.Session`: ```python session = requests.Session() with system.use_context(http_session=session): with Resource(BASEURL % \"data/table.csv\") as resource: assert resource.header == [\"id\", \"name\"] ``` ## System methods This object can be used to instantiate different kind of lower-level as though `Check`, `Step`, or `Field`. Here is a quick example of using the `system` object: ```python tabs=Python from frictionless import Resource, system # Create adapter = system.create_adapter(source, control=control) loader = system.create_loader(resource) parser = system.create_parser(resource) # Detect system.detect_resource(resource) field_candidates = system.detect_field_candidates() # Select Check = system.selectCheck(\u0027type\u0027) Control = system.selectControl(\u0027type\u0027) Error = system.selectError(\u0027type\u0027) Field = system.selectField(\u0027type\u0027) Step = system.selectStep(\u0027type\u0027) ``` As an extension author you might use the `system` object in various cases. For example, take a look at this `MultipartLoader` excerpts: ```python tabs=Python def read_line_stream(self): for number, path in enumerate(self.__path, start=1): resource = Resource(path=path) resource.infer(sample=False) with system.create_loader(resource) as loader: for line_number, line in enumerate(loader.byte_stream, start=1): if not self.__headless and number \u003e 1 and line_number == 1: continue yield line ``` It\u0027s important to understand that creating low-level objects in general is more corect using the `system` object than just classes because it will include all the available plugins in the process. ## Plugin API The Plugin API almost fully follows the system object\u0027s API. So as a plugin author you need to hook into the same methods. For example, let\u0027s take a look at a builtin Csv Plugin: ```python tabs=Python class CsvPlugin(Plugin): \"\"\"Plugin for CSV\"\"\" # Hooks def create_parser(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: return CsvParser(resource) def detect_resource(self, resource: Resource): if resource.format in [\"csv\", \"tsv\"]: resource.type = \"table\" resource.mediatype = f\"text/{resource.format}\" def select_Control(self, type: str): if type == \"csv\": return CsvControl ``` ## Reference ```yaml reference references: - frictionless.Adapter - frictionless.Loader - frictionless.Mapper - frictionless.Parser - frictionless.Plugin - frictionless.System ```",
+      },
+          'docs/advanced/extending': {
+          'name': 'Extension',
+          'path': 'docs/advanced/extending',
+          'relpath': 'docs/advanced/extending',
+          'text': "# Extension ```markdown remark type=danger This feature is currently experimental. The API might change without notice ``` Frictionless is built on top of a powerful plugins system which is used internally and allows to extend the framework. ## Creating Plugin To create a plugin you need: - create a module called `frictionless_` available in PYTHONPATH - subclass the Plugin class and override one of the methods above Please consult with [System/Plugin](system.html) for in-detail information about the Plugin interface and how these methods can be implemented. ## Plugin Example Let\u0027s say we\u0027re interested in supporting the `csv2k` format that we have just invented. For simplicity, let\u0027s use a format that is exactly the same with CSV. First of all, we need to create a `frictionless_csv2k` module containing a Plugin implementation and a Parser implementation but we\u0027re going to re-use the CsvParser as our new format is the same: \u003e frictionless_csv2k.py ```python tabs=Python from frictionless import Plugin, system from frictionless.plugins.csv import CsvParser class Csv2kPlugin(Plugin): def create_parser(self, resource): if resource.format == \"csv2k\": return Csv2kParser(resource) class Csv2kParser(CsvParser): pass system.register(\u0027csv2k\u0027, Csv2kPlugin()) ``` Now, we can use our new format in any of the Frictionless functions that accept a table source, for example, `extract` or `Table`: ```python tabs=Python from frictionless import extract rows = extract(\u0027data/table.csv2k\u0027) print(rows) ``` This example is over-simplified to show the high-level mechanics but writing Frictionless Plugins is designed to be easy. For inspiration, you can check the `frictionless/plugins` directory and learn from real-life examples. Also, in the Frictionless codebase there are many `Check`, `Control`, `Dialect`, `Loader`, `Parser`, and `Server` implementations - you can read their code for better understanding of how to write your own subclass or reach out to us for support. ## Reference ```yaml reference references: - frictionless.Plugin ```",
+      },
+          'docs/resources/file': {
+          'name': 'File Resource',
+          'path': 'docs/resources/file',
+          'relpath': 'docs/resources/file',
+          'text': "# File Resource A `file` resource is the most basic one. Actually, every data file can be maked as `file`. For example: ```python script tabs=Python from frictionless.resources import FileResource resource = FileResource(path=\u0027text.txt\u0027) resource.infer(stats=True) print(resource) ```",
+      },
+          'docs/resources/text': {
+          'name': 'Text Resource',
+          'path': 'docs/resources/text',
+          'relpath': 'docs/resources/text',
+          'text': "# Text Resource A `text` resource represents a textual file as a markdown document, for example: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TextResource resource = TextResource(path=\u0027article.md\u0027) resource.infer(stats=True) print(resource.read_text()) ```",
+      },
+          'docs/resources/json': {
+          'name': 'Json Resource',
+          'path': 'docs/resources/json',
+          'relpath': 'docs/resources/json',
+          'text': "# Json Resource A `json` resource contains a structured data like JSON or YAML (can be validated with JSONSchema -- under development): ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import JsonResource resource = JsonResource(path=\u0027data.json\u0027) resource.infer(stats=True) print(resource.read_data()) ```",
+      },
+          'docs/resources/table': {
+          'name': 'Table Resource',
+          'path': 'docs/resources/table',
+          'relpath': 'docs/resources/table',
+          'text': "# Table Resource A `table` resource contains a tabular data file (can be validated with Table Schema): ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource) ``` We can read the contents: ```python script tabs=Python from frictionless.resources import TableResource resource = TableResource(path=\u0027table.csv\u0027) resource.infer(stats=True) print(resource.read_rows()) ```",
+      },
+          'docs/schemes/aws': {
+          'name': 'AWS Schemes',
+          'path': 'docs/schemes/aws',
+          'relpath': 'docs/schemes/aws',
+          'text': "# AWS Schemes Frictionless supports reading data from a AWS cloud source. You can read files in any format that is available in your S3 bucket. ```bash tabs=CLI pip install frictionless[aws] pip install \u0027frictionless[aws]\u0027 # for zsh shell ``` ## Reading Data You can read from this source using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027s3://bucket/table.csv\u0027) pprint(resource.read_rows()) ``` For reading from a private bucket you need to setup AWS creadentials as it\u0027s described in the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027s3://bucket/table.csv\u0027) ``` ## Configuration There is a `Control` to configure how Frictionless read files in this storage. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.s3 import S3Control resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table.new.csv\u0027, control=controls.S3Control(endpoint_url=\u0027\u0027)) ``` ## Reference ```yaml reference references: - frictionless.schemes.AwsControl ```",
+      },
+          'docs/schemes/buffer': {
+          'name': 'Buffer Scheme',
+          'path': 'docs/schemes/buffer',
+          'relpath': 'docs/schemes/buffer',
+          'text': "# Buffer Scheme Frictionless supports working with bytes loaded into memory. ## Reading Data You can read Buffer Data using `Package/Resource` API, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(b\u0027id,name\\n1,english\\n2,german\u0027, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027buffer\u0027, format=\u0027csv\u0027) print(target) print(target.read_rows()) ```",
+      },
+          'docs/schemes/local': {
+          'name': 'Local Scheme',
+          'path': 'docs/schemes/local',
+          'relpath': 'docs/schemes/local',
+          'text': "# Local Scheme You can read and write files locally with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/schemes/multipart': {
+          'name': 'Multipart Scheme',
+          'path': 'docs/schemes/multipart',
+          'relpath': 'docs/schemes/multipart',
+          'text': "# Multipart Scheme You can read and write files split into chunks with Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027chunk1.csv\u0027, extrapaths=[\u0027chunk2.csv\u0027]) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027table.json\u0027) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control={\"chunkSize\": 1000000}) ``` ## Configuration There is a `Control` to configure how Frictionless reads files using this scheme. For example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.multipart import MultipartControl control = MultipartControl(chunk_size=1000000) resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table{number}.json\u0027, scheme=\"multipart\", control=control) ``` ## Reference ```yaml reference references: - frictionless.schemes.MultipartControl ```",
+      },
+          'docs/schemes/remote': {
+          'name': 'Remote Scheme',
+          'path': 'docs/schemes/remote',
+          'relpath': 'docs/schemes/remote',
+          'text': "# Remote Scheme You can read files remotely with Frictionless. This is a basic functionality of Frictionless. ## Reading Data You can read using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data A similar approach can be used for writing: ```python tabs=Python from frictionless import Resource resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\u0027https://example.com/data/table.csv\u0027) # will POST the file to the server ``` ## Configuration There is a `Control` to configure remote data, for example: ```python tabs=Python from frictionless import Resource from frictionless.plugins.remote import RemoteControl control = RemoteControl(http_timeout=10) path=\u0027https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/table.csv\u0027 resource = Resource(path=path, control=control) print(resource.to_view()) ``` ``` +----+-----------+ | id | name | +====+===========+ | 1 | \u0027english\u0027 | +----+-----------+ | 2 | \u0027\u4e2d\u56fd\u4eba\u0027 | +----+-----------+ ``` ## Reference ```yaml reference references: - frictionless.schemes.RemoteControl ```",
+      },
+          'docs/schemes/stream': {
+          'name': 'Stream Scheme',
+          'path': 'docs/schemes/stream',
+          'relpath': 'docs/schemes/stream',
+          'text': "# Stream Scheme Frictionless supports using data stored as File-Like objects in Python. ## Reading Data \u003e It\u0027s recommended to open files in byte-mode. If the file is opened in text-mode, Frictionless will try to re-open it in byte-mode. You can read Stream using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource with open(\u0027table.csv\u0027, \u0027rb\u0027) as file: resource = Resource(file, format=\u0027csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data A similiar approach can be used for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(scheme=\u0027stream\u0027, format=\u0027csv\u0027) print(target) print(target.to_view()) ```",
+      },
+          'docs/formats/csv': {
+          'name': 'Csv Format',
+          'path': 'docs/formats/csv',
+          'relpath': 'docs/formats/csv',
+          'text': "# Csv Format CSV is a file format which you can you in Frictionless for reading and writing. Arguable it\u0027s the main Open Data format so it\u0027s supported very well in Frictionless. ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.csv\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.csv\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a control to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027tmp/table.csv\u0027, control=formats.CsvControl(delimiter=\u0027;\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.CsvControl ```",
+      },
+          'docs/formats/erd': {
+          'name': 'Erd Format',
+          'path': 'docs/formats/erd',
+          'relpath': 'docs/formats/erd',
+          'text': "# Erd Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a data package as an ER-diagram `dot` file. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.to_er_diagram(path=\u0027erd.dot\u0027) ```",
+      },
+          'docs/formats/excel': {
+          'name': 'Excel Format',
+          'path': 'docs/formats/excel',
+          'relpath': 'docs/formats/excel',
+          'text': "# Excel Format Excel is a very popular tabular data format that usually has `xlsx` (newer) and `xls` (older) file extensions. Frictionless supports Excel files extensively. ```bash tabs=CLI pip install frictionless[excel] pip install \u0027frictionless[excel]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.xlsx\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.xlsx\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.xls\u0027, control=formats.ExcelControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.ExcelControl ```",
+      },
+          'docs/formats/gsheets': {
+          'name': 'Gsheets Format',
+          'path': 'docs/formats/gsheets',
+          'relpath': 'docs/formats/gsheets',
+          'text': "# Gsheets Format Frictionless supports parsing Google Sheets data as a file format. ```bash tabs=CLI pip install frictionless[gsheets] pip install \u0027frictionless[gsheets]\u0027 # for zsh shell ``` ## Reading Data You can read from Google Sheets using `Package/Resource`, for example: ```python tabs=Python from pprint import pprint from frictionless import Resource path=\u0027https://docs.google.com/spreadsheets/d/1mHIWnDvW9cALRMq9OdNfRwjAthCUFUOACPp0Lkyl7b4/edit?usp=sharing\u0027 resource = Resource(path=path) pprint(resource.read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027\u4e2d\u56fd\u4eba\u0027}] ``` ## Writing Data The same is actual for writing: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control}) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = formats.GsheetsControl(credentials=\".google.json\") resource = Resource(path=\u0027data/table.csv\u0027) resource.write(\"https://docs.google.com/spreadsheets/d//edit\", control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.GsheetsControl ```",
+      },
+          'docs/formats/html': {
+          'name': 'Html Format',
+          'path': 'docs/formats/html',
+          'relpath': 'docs/formats/html',
+          'text': "# Html Format Frictionless supports parsing HTML format: ```bash tabs=CLI pip install frictionless[html] pip install \u0027frictionless[html]\u0027 # for zsh shell ``` ## Reading Data You can this file format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table1.html\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.html\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure HTML, for example: ```python script tabs=Python from frictionless import Resource, resources, formats control=formats.HtmlControl(selector=\u0027#id\u0027) resource = resources.TableResource(path=\u0027table1.html\u0027, control=control) print(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.HtmlControl ```",
+      },
+          'docs/formats/inline': {
+          'name': 'Inline Format',
+          'path': 'docs/formats/inline',
+          'relpath': 'docs/formats/inline',
+          'text': "# Inline Format Frictionless supports working with Inline Data from memory. ## Reading Data You can read data in this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource source = Resource(\u0027table.csv\u0027) target = source.write(format=\u0027inline\u0027, datatype=\u0027table\u0027) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure this format, for example: ```python script tabs=Python from frictionless import Resource, formats control = formats.InlineControl(keyed=True, keys=[\u0027name\u0027, \u0027id\u0027]) resource = Resource(data=[{\u0027id\u0027: 1, \u0027name\u0027: \u0027english\u0027}, {\u0027id\u0027: 2, \u0027name\u0027: \u0027german\u0027}], control=control) print(resource.to_view()) ``` ## Reference ```yaml reference references: - frictionless.formats.InlineControl ```",
+      },
+          'docs/formats/json': {
+          'name': 'Json Format',
+          'path': 'docs/formats/json',
+          'relpath': 'docs/formats/json',
+          'text': "# Json Format Frictionless supports parsing JSON tables (JSON and JSONL/NDJSON). ```bash tabs=CLI pip install frictionless[json] pip install \u0027frictionless[json]\u0027 # for zsh shell ``` ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import resources resource = resources.TableResource(path=\u0027table.json\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.json\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.JsonControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.json\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.JsonControl ```",
+      },
+          'docs/formats/jsonschema': {
+          'name': 'JsonSchema Format',
+          'path': 'docs/formats/jsonschema',
+          'relpath': 'docs/formats/jsonschema',
+          'text': "# JsonSchema Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports importing a JsonSchema profile as a Table Schema. For example: ```python schema = Schema.from_jsonschema(\u0027table.jsonschema\u0027) ```",
+      },
+          'docs/formats/markdown': {
+          'name': 'Markdown Format',
+          'path': 'docs/formats/markdown',
+          'relpath': 'docs/formats/markdown',
+          'text': "# Markdown Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports exporting a metadata object as a Markdown document. For example: ```python schema = Schema(\u0027schema.json\u0027) schema.to_markdown(\u0027schema.md\u0027) ```",
+      },
+          'docs/formats/ods': {
+          'name': 'Ods Format',
+          'path': 'docs/formats/ods',
+          'relpath': 'docs/formats/ods',
+          'text': "# Ods Format Frictionless supports ODS parsing. ```bash tabs=CLI pip install frictionless[ods] pip install \u0027frictionless[ods]\u0027 # for zsh shell ``` ## Reading Data You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(path=\u0027table.ods\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python tabs=Python from pprint import pprint from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.ods\u0027) pprint(target) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats resource = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) resource.write(\u0027table-output-sheet.ods\u0027, control=formats.OdsControl(sheet=\u0027My Table\u0027)) ``` ## Reference ```yaml reference references: - frictionless.formats.OdsControl ```",
+      },
+          'docs/formats/pandas': {
+          'name': 'Pandas Format',
+          'path': 'docs/formats/pandas',
+          'relpath': 'docs/formats/pandas',
+          'text': "# Pandas Format Frictionless supports reading and writing Pandas dataframes. ```bash tabs=CLI pip install frictionless[pandas] pip install \u0027frictionless[pandas]\u0027 # for zsh shell ``` ## Reading Data You can read a Pandas dataframe: ```python tabs=Python from frictionless import Resource resource = Resource(df) pprint(resource.read_rows()) ``` ## Writing Data You can write a dataset to Pandas: ```python tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) df = resource.to_pandas() ```",
+      },
+          'docs/formats/parquet': {
+          'name': 'Parquet Format',
+          'path': 'docs/formats/parquet',
+          'relpath': 'docs/formats/parquet',
+          'text': "# Parquet Format Frictionless supports reading and writing Parquet files. ```bash tabs=CLI pip install frictionless[parquet] pip install \u0027frictionless[parquet]\u0027 # for zsh shell ``` ## Reading Data You can read a Parquet file: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.parq\u0027) print(resource.read_rows()) ``` ## Writing Data You can write a dataset to Parquet: ```python script tabs=Python from frictionless import Resource resource = Resource(\u0027table.csv\u0027) target = resource.write(\u0027table-output.parq\u0027) print(target) print(target.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.ParquetControl ```",
+      },
+          'docs/formats/spss': {
+          'name': 'Spss Format',
+          'path': 'docs/formats/spss',
+          'relpath': 'docs/formats/spss',
+          'text': "# Spss Format Frictionless supports reading and writing SPSS files. ```bash tabs=CLI pip install frictionless[spss] pip install \u0027frictionless[spss]\u0027 # for zsh shell ``` ## Reading Data You can read SPSS files: ```python tabs=Python from pprint import pprint from frictionless import Resource resource = Resource(\u0027table.sav\u0027) pprint(resource.read_rows()) ``` ## Writing Data You can write SPSS files: ```python tabs=Python from frictionless import Resource source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = source.write(\u0027table-output.sav\u0027) pprint(target) pprint(target.read_rows()) ```",
+      },
+          'docs/formats/sql': {
+          'name': 'Sql Format',
+          'path': 'docs/formats/sql',
+          'relpath': 'docs/formats/sql',
+          'text': "# Sql Format Frictionless supports reading and writing SQL databases. ## Supported Databases Frictionless Framework in-general support many databases that can be used with `sqlalchemy`. Here is a list of the databases with tested support: ### SQLite \u003e https://www.sqlite.org/index.html It\u0027s a well-tested default database used by Frictionless: ```bash tabs=CLI pip install frictionless[sql] ``` ### PostgreSQL \u003e https://www.postgresql.org/ This database is well-tested and provides the most data types: ```bash tabs=CLI pip install frictionless[postgresql] ``` ### MySQL \u003e https://www.mysql.com/ Another popular databases having been tested with Frictionless: ```bash tabs=CLI pip install frictionless[mysql] ``` ### DuckDB \u003e https://duckdb.org/ DuckDB is a reletively new database and, currently, Frictionless support is experimental: ```bash tabs=CLI pip install frictionless[duckdb] ``` ## Reading Data You can read SQL database: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\"test_table\", basepath=\u0027data\u0027) with Resource(path=\"sqlite:///sqlite.db\", control=control) as resource: print(resource.read_rows()) ``` ## Writing Data You can write SQL databases: ```python tabs=Python from frictionless import Package package = Package(\u0027path/to/datapackage.json\u0027) package.publish(\u0027postgresql://database\u0027) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python tabs=Python from frictionless import Resource, formats control = SqlControl(table=\u0027table\u0027, order_by=\u0027field\u0027, where=\u0027field \u003e 20\u0027) resource = Resource(\u0027postgresql://database\u0027, control=control) ``` ## Reference ```yaml reference references: - frictionless.formats.SqlControl ```",
+      },
+          'docs/formats/yaml': {
+          'name': 'Json Format',
+          'path': 'docs/formats/yaml',
+          'relpath': 'docs/formats/yaml',
+          'text': "# Json Format Frictionless supports parsing YAML tables. ## Reading Data \u003e We use the `path` argument to ensure that it will not be guessed to be a metadata file You can read this format using `Package/Resource`, for example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources resource = resources.TableResource(path=\u0027table.yaml\u0027) pprint(resource.read_rows()) ``` ## Writing Data The same is actual for writing: ```python script tabs=Python from frictionless import Resource, resources source = Resource(data=[[\u0027id\u0027, \u0027name\u0027], [1, \u0027english\u0027], [2, \u0027german\u0027]]) target = resources.TableResource(path=\u0027table-output.yaml\u0027) source.write(target) print(target) print(target.to_view()) ``` ## Configuration There is a dialect to configure how Frictionless read and write files in this format. For example: ```python script tabs=Python from pprint import pprint from frictionless import Resource, resources, formats control=formats.YamlControl(keyed=True) resource = resources.TableResource(path=\u0027table.keyed.yaml\u0027, control=control) pprint(resource.read_rows()) ``` ## Reference ```yaml reference references: - frictionless.formats.YamlControl ```",
+      },
+          'docs/formats/zip': {
+          'name': 'Zip Format',
+          'path': 'docs/formats/zip',
+          'relpath': 'docs/formats/zip',
+          'text': "# Zip Format ```markdown remark type=warning This documentation page is work-in-progress ``` Frictionless supports zipped resources and reading/publishing data packages as a zip archive. For example: ```python package = Package(\u0027datapackage.zip\u0027) package.publish(\u0027otherpackage.zip\u0027) ```",
+      },
+          'docs/portals/ckan': {
+          'name': 'Ckan Portal',
+          'path': 'docs/portals/ckan',
+          'relpath': 'docs/portals/ckan',
+          'text': "# Ckan Portal With CKAN portal feature you can load and publish packages from a [CKAN](https://ckan.org), an open-source Data Management System. ## Installation To install this plugin you need to do: ```bash tabs=CLI pip install frictionless[ckan] --pre pip install \u0027frictionless[ckan]\u0027 --pre # for zsh shell ``` ## Reading a Package To import a Dataset from a CKAN instance as a Frictionless Package you can do as below: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl() package = Package(\u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027, control=ckan_control) ``` Where \u0027https://legado.dados.gov.br/dataset/bolsa-familia-pagamentos\u0027 is the URL for the CKAN dataset. This will download the dataset and all its resources metadata. You can pass parameters to CKAN Control to configure it, like the CKAN instance base URL (`baseurl`) and the dataset that you do want to download (`dataset`): ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, dataset=\u0027bolsa-familia-pagamentos\u0027) package = Package(control=ckan_control) ``` You don\u0027t need to pass the `dataset` parameter to CkanControl. In the case that you pass only the `baseurl` you can download a package as: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` ## Ignoring a Resource Schema In case that the CKAN dataset has a resource containing errors in its schema, you still can load the package passing the parameter `ignore_schema=True` to CKAN Control: ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_schema=True) package = Package(\u0027bolsa-familia-pagamentos\u0027, control=ckan_control) ``` This will download the dataset and all its resources, saving the resources\u0027 original schemas on `original_schema`. ## Publishing a package To publish a Package to a CKAN instance you will need an API key from an CKAN\u0027s user that has permission to create datasets. This key can be passed to CKAN Control as the parameter `apikey`. ```python tabs=Python from frictionless.portals import CkanControl from frictionless import Package ckan_control = CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, apikey=\u0027YOUR-SECRET-API-KEY\u0027) package = Package(...) # Create your package package.publish(control=ckan_control) ``` ## Reading a Catalog You can download a list of CKAN datasets using the Catalog. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027) c = Catalog(control=ckan_control) ``` This will download all datasets from the instance, limited only by the maximum number of datasets returned by the instance CKAN API. If the instance returns only 10 datasets as default, you can request more packages passing the parameter `num_packages`. In the example above if you want to download 1000 datasets you can do as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, num_packages=1000) c = Catalog(control=ckan_control) ``` It\u0027s possible that when you are requesting a large number of packages from CKAN, that some of them don\u0027t have a valid Package descriptor according to the specifications. In that case the standard behaviour will be to stop downloading a raise an exception. If you want to ignore individual package errors, you can pass the parameter `ignore_package_errors=True`: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_errors=True, num_packages=1000) c = Catalog(control=ckan_control) ``` And the output of the command above will be the CKAN datasets ids with errors and the total number of packages returned by your query to the CKAN instance: ``` Error in CKAN dataset 8d60eff7-1a46-42ef-be64-e8979117a378: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 933d7164-8128-4e12-97e6-208bc4935bcb: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") Error in CKAN dataset 93114fec-01c2-4ef5-8dfe-67da5027d568: [package-error] The data package has an error: descriptor is not valid (The data package has an error: property \"contributors[].email\" is not valid \"email\") (The data package has an error: property \"contributors[].email\" is not valid \"email\") Total number of packages: 13786 ``` You can see in the example above that 1000 packages were download from a total 13786 packages. You can download other packages passing an offset as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, ignore_package_erros=True, results_offset=1000) c = Catalog(control=ckan_control) ``` This will download 1000 packages after the the first 1000 packages. ## Fetching the datasets from an Organization or Group To fetch all packages from a organization will can use the CKAN Control parameter `organization_name`. e.g. if you want to fetch all datasets from the organization `https://legado.dados.gov.br/organization/agencia-espacial-brasileira-aeb` you can do as follows: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, organization_name=\u0027agencia-espacial-brasileira-aeb\u0027) c = Catalog(control=ckan_control) ``` Similarly, if you want to download all datasets from a CKAN Group you can pass the parameter `group_id` to the CKAN Control as: ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, group_id=\u0027ciencia-informacao-e-comunicacao\u0027) c = Catalog(control=ckan_control) ``` ## Using CKAN search You can also fetch only the datasets that are returned by the [CKAN Package Search endpoint](https://docs.ckan.org/en/2.9/api/#ckan.logic.action.get.package_search). You can pass the search parameters as the parameter `search` to CKAN Control. ```python tabs=Python import frictionless from frictionless import portals, Catalog ckan_control = portals.CkanControl(baseurl=\u0027https://legado.dados.gov.br\u0027, search={\u0027q\u0027: \u0027name:bolsa*\u0027}) c = Catalog(control=ckan_control) ``` ## Reference ```yaml reference references: - frictionless.portals.CkanControl ```",
+      },
+          'docs/portals/github': {
+          'name': 'Github Portal',
+          'path': 'docs/portals/github',
+          'relpath': 'docs/portals/github',
+          'text': "# Github Portal Github read and publish feature makes easy to share data between frictionless and the github repositories. All read/write functionalities are the wrapper around PyGithub library which is used under the hood to make connection to github api. ## Installation We need to install github extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[github] --pre pip install \u0027frictionless[github]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a github repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} ``` You can also use alias function instead, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx and xls but we can set the file types using control parameters. If the repo has a descriptor it simply returns the descriptor as shown below ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://https://github.com/fdtester/test-repo-with-datapackage-json\") ``` ``` print(package) {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") pprint(package.get_resource(\u0027capitals\u0027).read_rows()) ``` ``` [{\u0027id\u0027: 1, \u0027cid\u0027: 1, \u0027name\u0027: \u0027London\u0027}, {\u0027id\u0027: 2, \u0027cid\u0027: 2, \u0027name\u0027: \u0027Paris\u0027}, {\u0027id\u0027: 3, \u0027cid\u0027: 3, \u0027name\u0027: \u0027Berlin\u0027}, {\u0027id\u0027: 4, \u0027cid\u0027: 4, \u0027name\u0027: \u0027Rome\u0027}, {\u0027id\u0027: 5, \u0027cid\u0027: 5, \u0027name\u0027: \u0027Lisbon\u0027}] ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from github and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", apikey=apikey) catalog = Catalog( \"https://github.com/fdtester\", control=control ) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` ``` Total packages 4 [{\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` To read catalog, we need authenticated user so we have to pass the token as \u0027apikey\u0027 to the function. In the above example we are using search text to filter the repositories to small number. The search field is not mandatory. We can simply use \u0027control\u0027 parameters and get the same result as above, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(search=\"\u0027TestAction: Read\u0027 in:readme\", user=\"fdtester\", apikey=apikey) catalog = Catalog(control=control) print(\"Total packages\", len(catalog.packages)) print(catalog.packages[:2]) ``` As shown in the example above, we can use different qualifiers to search the repos. The above example searches for all the repos which has \u0027TestAction: Read\u0027 text in readme files. Similary we can use many different qualifiers and combination of those. To get full list of qualifiers you can check the github document [here](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories). Some examples of the qualifiers: ``` \u2018jquery\u2019 in:name \u2018jquery\u2019 in:name user:name sort:updated-asc \u2018TestAction: Read\u2019 in:readme ``` If we want to read the list of repositories of user \u0027fdtester\u0027 which has \u0027jquery\u0027 in its name then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester jquery in:name\") catalog = Catalog(control=control) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` There is only one repository having \u0027jquery\u0027 in name for this user\u0027s account, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param or qualifier. Here we are trying to read the repos with \u0027TestAction: Read\u0027 text in readme file in recently updated order, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\") catalog = Catalog(control=control) for index,package in enumerate(catalog.packages): print(f\"package:{index}\", \"\\n\") print(package) ``` ``` package:0 {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027student\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/student.xlsx\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xlsx\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027}]} package:1 {\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} package:2 {\u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027data/capitals.csv\u0027, \u0027scheme\u0027: \u0027file\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} package:3 {\u0027name\u0027: \u0027test-tabulator\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-resource\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027number\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027number-two\u0027, \u0027path\u0027: \u0027table-reverse.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\u00271174/datapackage.json\u0027) control = portals.GithubControl(repo=\"test-new-repo-doc\", name=\u0027FD\u0027, email=email, apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-new-repo-doc\") ``` We need to mention `name` and `email` explicitly if the user doesn\u0027t have name set in his github account, and if email is private and hidden. Otherwise, it will take these info from the user account. In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the response is a \u0027Repository\u0027 instance. ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.GithubControl(user=\"fdtester\", formats=[\"csv\"], repo=\"test-repo-without-datapackage\", apikey=apikey) package = Package(\"https://github.com/fdtester/test-repo-without-datapackage\") print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `per_page` and `page` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.GithubControl(apikey=apikey, search=\"user:fdtester sort:updated-desc \u0027TestAction: Read\u0027 in:readme\", per_page=1, page=1) catalog = Catalog(control=control) ``` ``` [{\u0027name\u0027: \u0027test-repo-jquery\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027country-1\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-jquery/main/country-1.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]}] ``` Similary, we can also control the write function using params as follows: ``` from pprint import pprint from frictionless import portals, Package package = Package(\u0027datapackage.json\u0027) control = portals.GithubControl(repo=\"test-repo\", name=\u0027FD Test\u0027, email=\"test@gmail\", apikey=apikey) response = package.publish(control=control) print(response) ``` ``` Repository(full_name=\"fdtester/test-repo\") ``` ## Reference ```yaml reference references: - frictionless.portals.GithubControl ```",
+      },
+          'docs/portals/zenodo': {
+          'name': 'Zenodo Portal',
+          'path': 'docs/portals/zenodo',
+          'relpath': 'docs/portals/zenodo',
+          'text': "# Zenodo Portal Zenodo API makes data sharing between frictionless framework and Zenodo easy. The data from the Zenodo repo can be read from as well as written to zenodo seamlessly. The api uses \u0027zenodopy\u0027 library underneath to communicate with Zenodo REST API. ## Installation We need to install zenodo extra dependencies to use this feature: ```bash tabs=CLI pip install frictionless[zenodo] --pre pip install \u0027frictionless[zenodo]\u0027 --pre # for zsh shell ``` ## Reading Package You can read data from a zenodo repository as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078768\") package.infer() print(package) ``` ``` {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]} ``` To increase the access limit, pass \u0027apikey\u0027 as the param to the reader function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(apikey=apikey) package = Package(\"https://zenodo.org/record/7078768\", control=control) print(package) ``` The `reader` function can read package from repos with/without data package descriptor. If the repo does not have the descriptor it will create the descriptor with the name same as the repo name as shown in the example above. By default, the function reads files of type csv, xlsx, xls etc which is supported by frictionless framework but we can set the file types using control parameters also. If the repo has a descriptor it simply returns the descriptor as shown below: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") package.infer() print(package) ``` ``` {\u0027name\u0027: \u0027testing\u0027, \u0027title\u0027: \u0027Frictionless Data Test Dataset\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027data\u0027, \u0027path\u0027: \u0027data.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027string\u0027, \u0027constraints\u0027: {\u0027required\u0027: True}}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027description\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027amount\u0027, \u0027type\u0027: \u0027number\u0027}], \u0027primaryKey\u0027: [\u0027id\u0027]}}, {\u0027name\u0027: \u0027data2\u0027, \u0027path\u0027: \u0027data2.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027parent\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027comment\u0027, \u0027type\u0027: \u0027string\u0027}], \u0027foreignKeys\u0027: [{\u0027fields\u0027: [\u0027parent\u0027], \u0027reference\u0027: {\u0027resource\u0027: \u0027data\u0027, \u0027fields\u0027: [\u0027id\u0027]}}]}}]} ``` Once you read the package from the repo, you can then easily access the resources and its data, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") pprint(package.get_resource(\u0027data\u0027).read_rows()) ``` ``` [{\u0027amount\u0027: Decimal(\u002710000.5\u0027), \u0027description\u0027: \u0027Taxes we collect\u0027, \u0027id\u0027: \u0027A3001\u0027, \u0027name\u0027: \u0027Taxes\u0027}, {\u0027amount\u0027: Decimal(\u00272000.5\u0027), \u0027description\u0027: \u0027Parking fees we collect\u0027, \u0027id\u0027: \u0027A5032\u0027, \u0027name\u0027: \u0027Parking Fees\u0027}] ``` You can apply any functions available in frictionless framework. Here is an example of applying validation to the package that was read. ```python tabs=Python from pprint import pprint from frictionless import portals, Package package = Package(\"https://zenodo.org/record/7078760\") report = catalog.packages[0].validate() pprint(report) ``` ``` {\u0027valid\u0027: True, \u0027stats\u0027: {\u0027tasks\u0027: 1, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.655}, \u0027warnings\u0027: [], \u0027errors\u0027: [], \u0027tasks\u0027: [{\u0027valid\u0027: True, \u0027name\u0027: \u0027first-http-resource\u0027, \u0027type\u0027: \u0027table\u0027, \u0027place\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027labels\u0027: [\u0027id\u0027, \u0027cid\u0027, \u0027name\u0027], \u0027stats\u0027: {\u0027md5\u0027: \u0027154d822b8c2aa259867067f01c0efee5\u0027, \u0027sha256\u0027: \u00275ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8\u0027, \u0027bytes\u0027: 76, \u0027fields\u0027: 3, \u0027rows\u0027: 5, \u0027warnings\u0027: 0, \u0027errors\u0027: 0, \u0027seconds\u0027: 0.651}, \u0027warnings\u0027: [], \u0027errors\u0027: []}]} ``` ## Reading Catalog Catalog is a container for the packages. We can read single/multiple repositories from Zenodo repo and create a catalog. ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027notes:\"TDWD\"\u0027) catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 2 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027headerRows\u0027: [2]}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027neighbor_id\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027population\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}, {\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` In the above example we are using search text to filter the repositories to reduce the result size to small number. However, the search field is not mandatory. We can simply use \u0027control\u0027 parameters and create the catalog from a single repo, for example: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(record=\"7078768\") catalog = Catalog(control=control) catalog.infer() print(\"Total packages\", len(catalog.packages)) print(catalog.packages) ``` ``` Total packages 1 [{\u0027title\u0027: \u0027Frictionless Data Test Dataset Without Descriptor\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027, \u0027dialect\u0027: {\u0027csv\u0027: {\u0027skipInitialSpace\u0027: True}}, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}, {\u0027name\u0027: \u0027table\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027table.xls\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027xls\u0027, \u0027encoding\u0027: \u0027utf-8\u0027, \u0027mediatype\u0027: \u0027application/vnd.ms-excel\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` As shown in the first catalog example above, we can use different search queries to filter the repos. The above example searches for all the repos which has \u0027notes:\"TDWD\"\u0027 text in readme files. Similary we can use many different queries combining many terms, phrases or field search. To get full list of different queries you can check the zenodo official document [here](https://help.zenodo.org/guides/search). Some examples of the search queries are: ``` \"open science\" title:\"open science\" +description:\"frictionless\" +title:\"Bionomia\" +publication_date:[2022-10-01 TO 2022-11-01] +title:\"frictionless\" ``` We can search for different terms such as \"open science\" and also use \u0027+\u0027 to specify mandatory. If \"+\" is not specified, it will be optional and will apply \u0027OR\u0027 logic to the search. We can also use field search. All the search queries supported by Zenodo Rest API can be used. If we want to read the list of repositories which has term \"+frictionlessdata +science\" then we write search query as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog control = portals.ZenodoControl(search=\u0027+frictionlessdata +science\u0027) catalog = Catalog(control=control) print(\"Total Packages\", len(catalog.packages)) ``` ``` Total Packages 1 ``` There is only one repository having terms \u0027+frictionlessdata +science\u0027, so it returned only one repository. We can also read repositories in defined order using \u0027sort\u0027 param. Here we are trying to read the repos with \u0027creators.name:\"FD Tester\"\u0027 in recently updated order, for example: ```python script tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name:\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) catalog.infer() ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Publishing Data To write data to the repository, we use `Package.publish` function as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` ``` 1123500 ``` To publish the data, we need to provide metadata for the Zenodo repo which we are sending using \"meta.json\". In order to be able to publish/write to respository, we need to have the api token with \u0027repository write\u0027 access. If the package is successfully published, the deposition_id will be returned as shown in the example above. For testing, we can pass sandbox url using base_url param ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( metafn=\"data/zenodo/meta.json\", apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` If the metadata file is not provided, then the api will read available data from the package file. Metadata will be generated using title, contributors and description from Package descriptor. ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl( apikey=apikey_sandbox, base_url=\"https://sandbox.zenodo.org/api/\" ) package = Package(\"484/package-to-write/datapackage.json\") deposition_id = package.publish(control=control) ``` ## Configuration We can control the behavior of all the above three functions using various params. For example, to read only \u0027csv\u0027 files in package we use the following code: ```python tabs=Python from pprint import pprint from frictionless import portals, Package control = portals.ZenodoControl(formats=[\"csv\"], record=\"7078725\", apikey=apikey) package = Package(control=control) print(package) ``` ``` {\u0027name\u0027: \u0027test-repo-without-datapackage\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027capitals\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/capitals.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}, {\u0027name\u0027: \u0027countries\u0027, \u0027type\u0027: \u0027table\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-without-datapackage/master/data/countries.csv\u0027, \u0027scheme\u0027: \u0027https\u0027, \u0027format\u0027: \u0027csv\u0027, \u0027mediatype\u0027: \u0027text/csv\u0027}]} ``` In order to read first page of the search result and create a catalog, we use `page` and `size` params as follows: ```python tabs=Python from pprint import pprint from frictionless import portals, Catalog catalog = Catalog( control=portals.ZenodoControl( search=\u0027creators.name\"FD Tester\"\u0027, sort=\"mostrecent\", page=1, size=1, ), ) print(catalog.packages) ``` ``` [{\u0027name\u0027: \u0027test-repo-resources-with-http-data-csv\u0027, \u0027title\u0027: \u0027Test Write File - Remote\u0027, \u0027resources\u0027: [{\u0027name\u0027: \u0027first-http-resource\u0027, \u0027path\u0027: \u0027https://raw.githubusercontent.com/fdtester/test-repo-with-datapackage-yaml/master/data/capitals.csv\u0027, \u0027schema\u0027: {\u0027fields\u0027: [{\u0027name\u0027: \u0027id\u0027, \u0027type\u0027: \u0027integer\u0027}, {\u0027name\u0027: \u0027cid\u0027, \u0027type\u0027: \u0027string\u0027}, {\u0027name\u0027: \u0027name\u0027, \u0027type\u0027: \u0027string\u0027}]}}]}] ``` ## Reference ```yaml reference references: - frictionless.portals.ZenodoControl ``` ```",
+      },
+          'docs/checks/baseline': {
+          'name': 'Baseline Check',
+          'path': 'docs/checks/baseline',
+          'relpath': 'docs/checks/baseline',
+          'text': "# Baseline Check ## Overview The Baseline Check is always enabled. It makes various small checks that reveal a great deal of tabular errors. You can create an empty `Checklist` to see the baseline check scope: \u003e Download [`capital-invalid.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/main/data/capital-invalid.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import Checklist, validate checklist = Checklist() pprint(checklist.scope) report = validate(\u0027capital-invalid.csv\u0027) # we don\u0027t pass the checklist as the empty one is default pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` The Baseline Check is incorporated into base Frictionless classes as though Resource, Header, and Row. There is no exact order in which those errors are revealed as it\u0027s highly optimized. One should consider the Baseline Check as one unit of validation. ## Reference ```yaml reference references: - frictionless.checks.baseline ```",
+      },
+          'docs/checks/table': {
+          'name': 'Table Checks',
+          'path': 'docs/checks/table',
+          'relpath': 'docs/checks/table',
+          'text': "# Table Checks ## Table Dimensions This check is used to validate if your data has expected dimensions as: exact number of rows , minimum and maximum number of rows, exact number of fields , minimum and maximum number of fields. ### Basic Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Multiple Limits You can also give multiples limits at the same time: ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.table_dimensions(num_rows=5, num_fields=4)]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Using Declaratively It is possible to use de check declaratively as: ```python script tabs=Python from pprint import pprint from frictionless import Check, validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] check = Check.from_descriptor({\"type\": \"table-dimensions\", \"minFields\": 4, \"maxRows\": 3}) report = validate(source, checks=[check]) pprint(report.flatten([\"type\", \"message\"])) ``` But the table dimensions check arguments `num_rows`, `min_rows`, `max_rows`, `num_fields`, `min_fields`, `max_fields` must be passed in camelCase format as the example above i.e. `numRows`, `minRows`, `maxRows`, `numFields`, `minFields` and `maxFields`. ### Reference ```yaml reference level: 4 references: - frictionless.checks.table_dimensions ```",
+      },
+          'docs/checks/row': {
+          'name': 'Row Checks',
+          'path': 'docs/checks/row',
+          'relpath': 'docs/checks/row',
+          'text': "# Row Checks ## Duplicate Row This checks for duplicate rows. You need to take into account that checking for duplicate rows can lead to high memory consumption on big files. Here is an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\"header\\nvalue\\nvalue\" report = validate(source, format=\"csv\", checks=[checks.duplicate_row()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.duplicate_row ``` ## Row Constraint This check is the most powerful one as it uses the external `simpleeval` package allowing you to evaluate arbitrary Python expressions on data rows. Let\u0027s show on an example. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [ [\"row\", \"salary\", \"bonus\"], [2, 1000, 200], [3, 2500, 500], [4, 1300, 500], [5, 5000, 1000], ] report = validate(source, checks=[checks.row_constraint(formula=\"salary == bonus * 5\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.row_constraint ```",
+      },
+          'docs/checks/cell': {
+          'name': 'Cell Checks',
+          'path': 'docs/checks/cell',
+          'relpath': 'docs/checks/cell',
+          'text': "# Cell Checks ## ASCII Value If you want to skip non-ascii characters, this check helps to notify if there are any in data during validation. Here is how we can use this check. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source=[[\"s.no\",\"code\"],[1,\"ss\u00b5\"]] report = validate(source, checks=[checks.ascii_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.ascii_value ``` ## Deviated Cell This check identifies deviated cells from the normal ones. To flag the deviated cell, the check compares the length of the characters in each cell with a threshold value. The threshold value is either 5000 or value calculated using Python\u0027s built-in `statistics` module which is average plus(+) three standard deviation. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/main/frictionless/checks/cell/deviated_value.py). For example: ### Example \u003e Download [`issue-1066.csv`](https://raw.githubusercontent.com/frictionlessdata/frictionless-py/master/data/issue-1066.csv) to reproduce the examples (right-click and \"Save link as\").. ```python script tabs=Python from pprint import pprint from frictionless import validate, checks report = validate(\"issue-1066.csv\", checks=[checks.deviated_cell()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_cell ``` ## Deviated Value This check uses Python\u0027s built-in `statistics` module to check a field\u0027s data for deviations. By default, deviated values are outside of the average +- three standard deviations. Take a look at the [API Reference](../../docs/checks/cell.html#reference-checks.deviated_value) for more details about available options and default values. The exact algorithm can be found [here](https://github.com/frictionlessdata/frictionless-py/blob/7ae8bae9a9197adbfe443233a6bad8a94e065ece/frictionless/checks/heuristic.py#L94). For example: ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"temperature\"], [1], [-2], [7], [0], [1], [2], [5], [-4], [1000], [8], [3]] report = validate(source, checks=[checks.deviated_value(field_name=\"temperature\")]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.deviated_value ``` ## Forbidden Value This check ensures that some field doesn\u0027t have any forbidden or denylist values. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\nvalue1\\nvalue2\u0027 checks = [checks.forbidden_value(field_name=\u0027header\u0027, values=[\u0027value2\u0027])] report = validate(source, format=\u0027csv\u0027, checks=checks) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.forbidden_value ``` ## Sequential Value This check gives us an opportunity to validate sequential fields like primary keys or other similar data. It doesn\u0027t need to start from 0 or 1. We\u0027re providing a field name. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = b\u0027header\\n2\\n3\\n5\u0027 report = validate(source, format=\u0027csv\u0027, checks=[checks.sequential_value(field_name=\u0027header\u0027)]) pprint(report.flatten([\u0027type\u0027, \u0027message\u0027])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.sequential_value ``` ## Truncated Value Sometime during data export from a database or other storage, data values can be truncated. This check tries to detect such truncation. Let\u0027s explore some truncation indicators. ### Example ```python script tabs=Python from pprint import pprint from frictionless import validate, checks source = [[\"int\", \"str\"], [\"a\" * 255, 32767], [\"good\", 2147483647]] report = validate(source, checks=[checks.truncated_value()]) pprint(report.flatten([\"type\", \"message\"])) ``` ### Reference ```yaml reference level: 4 references: - frictionless.checks.truncated_value ```",
+      },
+          'docs/steps/resource': {
+          'name': 'Resource Steps',
+          'path': 'docs/steps/resource',
+          'relpath': 'docs/steps/resource',
+          'text': "# Resource Steps The Resource steps are only available for a package transformation (except for `steps.resource_update` available for standalone resources). This includes some basic resource management operations like adding or removing resources along with the hierarchical `transform_resource` step. ## Add Resource This step add a new resource to a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_add(name=\u0027extra\u0027, descriptor={\u0027path\u0027: \u0027transform.csv\u0027}), ], ) print(target.resource_names) print(target.get_resource(\u0027extra\u0027).schema) print(target.get_resource(\u0027extra\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_add ``` ## Remove Resource This step remove an existent resource from a data package. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_remove(name=\u0027main\u0027), ], ) print(target) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_remove ``` ## Transform Resource It\u0027s a hierarchical step allowing to transform a data package\u0027s resource. It\u0027s possible to use any resource steps as a part of this package step. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_transform(name=\u0027main\u0027, steps=[ steps.row_sort(field_names=[\u0027name\u0027]) ]), ], ) print(target.resource_names) print(target.get_resource(\u0027main\u0027).schema) print(target.get_resource(\u0027main\u0027).to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_transform ``` ## Update Resource This step update a resource\u0027s metadata. It can be used for both resource and package transformations. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Package(resources=[Resource(name=\u0027main\u0027, path=\"transform.csv\")]) target = transform( source, steps=[ steps.resource_update( name=\u0027main\u0027, descriptor={\u0027title\u0027: \u0027Main Resource\u0027, \u0027description\u0027: \u0027For the docs\u0027} ), ], ) print(target.get_resource(\u0027main\u0027)) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.resource_update ```",
+      },
+          'docs/steps/table': {
+          'name': 'Table Steps',
+          'path': 'docs/steps/table',
+          'relpath': 'docs/steps/table',
+          'text': "# Table Steps These steps are meant to be used on a table level of a resource. This includes various different operations from simple validation or writing to the disc to complex re-shaping like pivoting or melting. ## Aggregate Table Group rows under the given group_name then apply aggregation functions provided as aggregation dictionary (see example) ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_aggregate( group_name=\"name\", aggregation={\"sum\": (\"population\", sum)} ), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_aggregate ``` ## Attach Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_attach(resource=Resource(data=[[\"note\"], [\"large\"], [\"mid\"]])), ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_attach ``` ## Debug Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_debug(function=print), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_debug ``` ## Diff Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_diff( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_diff ``` ## Intersect Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_intersect( resource=Resource( data=[ [\"id\", \"name\", \"population\"], [1, \"germany\", 83], [2, \"france\", 50], [3, \"spain\", 47], ] ), ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_intersect ``` ## Join Tables ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=Resource(data=[[\"id\", \"note\"], [1, \"beer\"], [2, \"vine\"]]), field_name=\"id\", ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_join ``` ## Melt Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_melt ``` ## Merge Tables ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1221](https://github.com/frictionlessdata/frictionless-py/issues/1221) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_merge( resource=Resource(data=[[\"id\", \"name\", \"note\"], [4, \"malta\", \"island\"]]) ), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_merge ``` ## Normalize Table The `table_normalize` step normalizes an underlaying tabular stream (cast types and fix dimensions) according to a provided or inferred schema. If your data is not really big it\u0027s recommended to normalize a table before any others steps. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(\"table.csv\") print(source.read_cells()) target = transform( source, steps=[ steps.table_normalize(), ] ) print(target.read_cells()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_normalize ``` ## Pivot Table ### Example ```markdown remark type=danger This functionality is currently disabled as being fixed in [#1220](https://github.com/frictionlessdata/frictionless-py/issues/1220) ``` ```python tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-pivot.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_pivot(f1=\"region\", f2=\"gender\", f3=\"units\", aggfun=sum), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_pivot ``` ## Print Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_print(), ] ) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_print ``` ## Recast Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_melt(field_name=\"id\"), steps.table_recast(field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_recast ``` ## Transpose Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.table_transpose(), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_transpose ``` ## Validate Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=\"bad\"), steps.table_validate(), ] ) pprint(target.schema) try: pprint(target.to_view()) except Exception as exception: pprint(exception) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_validate ``` ## Write Table ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_write(path=\u0027transform.json\u0027), ] ) ``` Let\u0027s read the output: ```bash script tabs=CLI cat transform.json ``` ```python script tabs=Python with open(\u0027transform.json\u0027) as file: print(file.read()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.table_write ```",
+      },
+          'docs/steps/field': {
+          'name': 'Field Steps',
+          'path': 'docs/steps/field',
+          'relpath': 'docs/steps/field',
+          'text': "# Field Steps The Field steps are responsible for managing a Table Schema\u0027s fields. You can add or remove them along with more complex operations like unpacking. ## Add Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_add(name=\"note\", value=\"eu\", descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_add ``` ## Filter Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_filter(names=[\"id\", \"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_filter ``` ## Merge Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # seperator argument can be used to set delimeter. Default value is \u0027-\u0027 # preserve argument keeps the original fields steps.field_merge(name=\"details\", from_names=[\"name\", \"population\"], preserve=True) ], ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_merge ``` ## Move Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_move(name=\"id\", position=3), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_move ``` ## Pack Fields ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ # field_type returns packed fields as JSON Object. Default value for field_type is \u0027array\u0027 # preserve argument keeps the original fields steps.field_pack(name=\"details\", from_names=[\"name\", \"population\"], as_object=True, preserve=True) ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_pack ``` ## Remove Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_remove(names=[\"id\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_remove ``` ## Split Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_split(name=\"name\", to_names=[\"name1\", \"name2\"], pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_split ``` ## Unpack Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=[1, 1], descriptor={\"type\": \"string\"}), steps.field_unpack(name=\"id\", to_names=[\"id2\", \"id3\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_unpack ``` ## Update Field ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=str, descriptor={\"type\": \"string\"}), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.field_update ```",
+      },
+          'docs/steps/row': {
+          'name': 'Row Steps',
+          'path': 'docs/steps/row',
+          'relpath': 'docs/steps/row',
+          'text': "# Row Steps These steps are row-based including row filtering, slicing, and many more. ## Filter Rows This step filters rows based on a provided formula or function. ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.row_filter(formula=\"id \u003e 1\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_filter ``` ## Search Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_search(regex=r\"^f.*\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_search ``` ## Slice Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_slice(head=2), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_slice ``` ## Sort Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_sort(field_names=[\"name\"]), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_sort ``` ## Split Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.row_split(field_name=\"name\", pattern=\"a\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_split ``` ## Subset Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.field_update(name=\"id\", value=1), steps.row_subset(subset=\"conflicts\", field_name=\"id\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_subset ``` ## Ungroup Rows ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform-groups.csv\") target = transform( source, steps=[ steps.row_ungroup(group_name=\"name\", selection=\"first\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.row_ungroup ```",
+      },
+          'docs/steps/cell': {
+          'name': 'Cell Steps',
+          'path': 'docs/steps/cell',
+          'relpath': 'docs/steps/cell',
+          'text': "# Cell Steps The Cell steps are responsible for cell operations like converting, replacing, or formating, along with others. ## Convert Cells Converts cell values of one or more fields using arbitrary functions, method invocations or dictionary translations. ### Using Value We can provide a value to be set as a value of all cells of this field. Take into account that the value type needs to conform to the field type otherwise it will lead to a validation error: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027population\u0027, value=\"100\"), ], ) print(target.to_view()) ``` ### Using Mapping Another option to modify the field\u0027s cell is to provide a mapping. It\u0027s a translation table that uses literal matching to replace values. It\u0027s usually used for string fields: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_convert(field_name=\u0027name\u0027, mapping = {\u0027germany\u0027: \u0027GERMANY\u0027}), ], ) print(target.to_view()) ``` ### Using Function ```markdown remark type=info Functions are not supported in declarative pipelines ``` We can provide an arbitrary function to update the field cells. If you want to modify a non-string field it\u0027s really important to normalize the table first otherwise the function will be applied to a non-parsed value: ```python script tabs=Python from frictionless import Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.table_normalize(), steps.cell_convert(field_name=\u0027population\u0027, function=lambda v: v*2), ], ) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_convert ``` ## Fill Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=None), steps.cell_fill(field_name=\"name\", value=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_fill ``` ## Format Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_format(template=\"Prefix: {0}\", field_name=\"name\"), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_format ``` ## Interpolate Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_interpolate(template=\"Prefix: %s\", field_name=\"name\"), ] ) pprint(target.schema) pprint(target.read_rows()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_interpolate ``` ## Replace Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_replace(pattern=\"france\", replace=\"FRANCE\"), ] ) print(target.schema) print(target.to_view()) ``` ```yaml reference level: 4 references: - frictionless.steps.cell_replace ``` ## Set Cells ### Example ```python script tabs=Python from pprint import pprint from frictionless import Package, Resource, transform, steps source = Resource(path=\"transform.csv\") target = transform( source, steps=[ steps.cell_set(field_name=\"population\", value=100), ] ) print(target.schema) print(target.to_view()) ``` ### Reference ```yaml reference level: 4 references: - frictionless.steps.cell_set ```",
+      },
+          'docs/fields/any': {
+          'name': 'Any Field',
+          'path': 'docs/fields/any',
+          'relpath': 'docs/fields/any',
+          'text': "# Any Field ## Overview AnyField provides an ability to skip any cell parsing. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#any). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [1], [\u00271\u0027]] rows = extract(data, schema=Schema(fields=[fields.AnyField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.AnyField ```",
+      },
+          'docs/fields/array': {
+          'name': 'Array Field',
+          'path': 'docs/fields/array',
+          'relpath': 'docs/fields/array',
+          'text': "# Array Field ## Overview The field contains data that is a valid JSON format arrays. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#array). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027[\"value1\", \"value2\"]\u0027]] rows = extract(data, schema=Schema(fields=[fields.ArrayField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ArrayField ```",
+      },
+          'docs/fields/boolean': {
+          'name': 'Boolean Field',
+          'path': 'docs/fields/boolean',
+          'relpath': 'docs/fields/boolean',
+          'text': "# Boolean Field ## Overview The field contains boolean (true/false) data. In the physical representations of data where boolean values are represented with strings, the values set in trueValues and falseValues are to be cast to their logical representation as booleans. trueValues and falseValues are arrays which can be customised to user need. The default values for these are in the additional properties section below. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#boolean). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027true\u0027], [\u0027false\u0027]] rows = extract(data, schema=Schema(fields=[fields.BooleanField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.BooleanField ```",
+      },
+          'docs/fields/date': {
+          'name': 'Date Field',
+          'path': 'docs/fields/date',
+          'relpath': 'docs/fields/date',
+          'text': "# Date Field ## Overview A date without a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#date). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22\u0027]] rows = extract(data, schema=Schema(fields=[fields.DateField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DateField ```",
+      },
+          'docs/fields/datetime': {
+          'name': 'Datetime Field',
+          'path': 'docs/fields/datetime',
+          'relpath': 'docs/fields/datetime',
+          'text': "# Datetime Field ## Overview A date with a time (by default in ISO8601 format). Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#datetime). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08-22T12:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.DatetimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DatetimeField ```",
+      },
+          'docs/fields/duration': {
+          'name': 'Duration Field',
+          'path': 'docs/fields/duration',
+          'relpath': 'docs/fields/duration',
+          'text': "# Duration Field ## Overview A duration of time. We follow the definition of XML Schema duration datatype directly and that definition is implicitly inlined here. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#duration). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027P1Y\u0027]] rows = extract(data, schema=Schema(fields=[fields.DurationField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.DurationField ```",
+      },
+          'docs/fields/geojson': {
+          'name': 'Geojson Field',
+          'path': 'docs/fields/geojson',
+          'relpath': 'docs/fields/geojson',
+          'text': "# Geojson Field The field contains a JSON object according to GeoJSON or TopoJSON spec. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geojson). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"geometry\": null, \"type\": \"Feature\", \"properties\": {\"k\": \"v\"}}\u0027]] rows = extract(data, schema=Schema(fields=[fields.GeojsonField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeojsonField ```",
+      },
+          'docs/fields/geopoint': {
+          'name': 'Geopoint Field',
+          'path': 'docs/fields/geopoint',
+          'relpath': 'docs/fields/geopoint',
+          'text': "# Geopoint Field The field contains data describing a geographic point. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#geopoint). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\"180, -90\"]] rows = extract(data, schema=Schema(fields=[fields.GeopointField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.GeopointField ```",
+      },
+          'docs/fields/integer': {
+          'name': 'Integer Field',
+          'path': 'docs/fields/integer',
+          'relpath': 'docs/fields/integer',
+          'text': "# Integer Field The field contains integers - that is whole numbers. Integer values are indicated in the standard way for any valid integer. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#integer). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271\u0027], [\u00272\u0027], [\u00273\u0027]] rows = extract(data, schema=Schema(fields=[fields.IntegerField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.IntegerField ```",
+      },
+          'docs/fields/number': {
+          'name': 'Number Field',
+          'path': 'docs/fields/number',
+          'relpath': 'docs/fields/number',
+          'text': "# Number Field ## Overview The field contains numbers of any kind including decimals. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#number). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00271.1\u0027], [\u00272.2\u0027], [\u00273.3\u0027]] rows = extract(data, schema=Schema(fields=[fields.NumberField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.NumberField ```",
+      },
+          'docs/fields/object': {
+          'name': 'Object Field',
+          'path': 'docs/fields/object',
+          'relpath': 'docs/fields/object',
+          'text': "# Object Field ## Overview The field contains data which is valid JSON. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#object). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027{\"key\": \"value\"}\u0027]] rows = extract(data, schema=Schema(fields=[fields.ObjectField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.ObjectField ```",
+      },
+          'docs/fields/string': {
+          'name': 'String Field',
+          'path': 'docs/fields/string',
+          'relpath': 'docs/fields/string',
+          'text': "# String Field ## Overview The field contains strings, that is, sequences of characters. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#string). Currently supported formats: - default - uri - email - uuid - binary - wkt (doesn\u0027t work in Python3.10+) ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u0027value\u0027]] rows = extract(data, schema=Schema(fields=[fields.StringField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.StringField ```",
+      },
+          'docs/fields/time': {
+          'name': 'Time Field',
+          'path': 'docs/fields/time',
+          'relpath': 'docs/fields/time',
+          'text': "# Time Field ## Overview A time without a date. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#time). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u002715:00:00\u0027]] rows = extract(data, schema=Schema(fields=[fields.TimeField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.TimeField ```",
+      },
+          'docs/fields/year': {
+          'name': 'Year Field',
+          'path': 'docs/fields/year',
+          'relpath': 'docs/fields/year',
+          'text': "# Year Field ## Overview A calendar year as per XMLSchema gYear. Usual lexical representation is YYYY. There are no format options. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#year). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearField ```",
+      },
+          'docs/fields/yearmonth': {
+          'name': 'Yearmonth Field',
+          'path': 'docs/fields/yearmonth',
+          'relpath': 'docs/fields/yearmonth',
+          'text': "# Yearmonth Field ## Overview A specific month in a specific year as per XMLSchema gYearMonth. Usual lexical representation is: YYYY-MM. Read more in [Table Schema Standard](https://specs.frictionlessdata.io/table-schema/#yearmonth). ## Example ```python script tabs=Python from frictionless import Schema, extract, fields data = [[\u0027name\u0027], [\u00272022-08\u0027]] rows = extract(data, schema=Schema(fields=[fields.YearmonthField(name=\u0027name\u0027)])) print(rows) ``` ## Reference ```yaml reference references: - frictionless.fields.YearmonthField ```",
+      },
+          'docs/errors/metadata': {
+          'name': 'Metadata Errors',
+          'path': 'docs/errors/metadata',
+          'relpath': 'docs/errors/metadata',
+          'text': "# Metadata Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.ResourceError] %} {% for Error in errors.MetadataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.MetadataError ```",
+      },
+          'docs/errors/resource': {
+          'name': 'Resource Errors',
+          'path': 'docs/errors/resource',
+          'relpath': 'docs/errors/resource',
+          'text': "# Resource Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.ResourceError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.ResourceError ```",
+      },
+          'docs/errors/data': {
+          'name': 'Data Errors',
+          'path': 'docs/errors/data',
+          'relpath': 'docs/errors/data',
+          'text': "# Data Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.FileError, errors.TableError] %} {% for Error in errors.DataError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.DataError ```",
+      },
+          'docs/errors/file': {
+          'name': 'File Errors',
+          'path': 'docs/errors/file',
+          'relpath': 'docs/errors/file',
+          'text': "# File Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.FileError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.FileError ```",
+      },
+          'docs/errors/table': {
+          'name': 'Table Errors',
+          'path': 'docs/errors/table',
+          'relpath': 'docs/errors/table',
+          'text': "# Table Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.HeaderError, errors.RowError] %} {% for Error in errors.TableError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.TableError ```",
+      },
+          'docs/errors/header': {
+          'name': 'Header Errors',
+          'path': 'docs/errors/header',
+          'relpath': 'docs/errors/header',
+          'text': "# Header Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.LabelError] %} {% for Error in errors.HeaderError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.HeaderError ```",
+      },
+          'docs/errors/label': {
+          'name': 'Label Errors',
+          'path': 'docs/errors/label',
+          'relpath': 'docs/errors/label',
+          'text': "# Label Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.LabelError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.LabelError ```",
+      },
+          'docs/errors/row': {
+          'name': 'Row Errors',
+          'path': 'docs/errors/row',
+          'relpath': 'docs/errors/row',
+          'text': "# Row Errors {% set errors = frictionless.platform.frictionless_errors %} {% set exclude = [errors.CellError] %} {% for Error in errors.RowError.list_children(root=True, exclude=exclude) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.RowError - frictionless.errors.ForeignKeyError ```",
+      },
+          'docs/errors/cell': {
+          'name': 'Cell Errors',
+          'path': 'docs/errors/cell',
+          'relpath': 'docs/errors/cell',
+          'text': "# Cell Errors {% set errors = frictionless.platform.frictionless_errors %} {% for Error in errors.CellError.list_children(root=True) %} ## {{ Error.title }} | Name | Value | | ----------- | -------------------------- | | Type | {{ Error.type }} | | Title | {{ Error.title }} | | Description | {{ Error.description }} | | Template | {{ Error.template }} | | Tags | {{ Error.tags|join(\u0027 \u0027) }} | {% endfor %} ## Reference ```yaml reference references: - frictionless.errors.CellError ```",
+      },
+          'docs/codebase/authors': {
+          'name': 'Authors',
+          'path': 'docs/codebase/authors',
+          'relpath': 'docs/codebase/authors',
+          'text': "# Authors \u003e This page is powered by [contributors-img](https://contributors-img.web.app) This package is a collective effort made by many great people working on various projects. You can click on the pictures below to see their contribution in detail. ## frictionless-py ## datapackage-py ## tableschema-py ## tableschema-bigquery-py ## tableschema-ckan-datastore-py ## tableschema-elasticsearch-py ## tableschema-pandas-py ## tableschema-sql-py ## tableschema-spss-py ## tabulator-py",
+      },
+          'docs/codebase/license': {
+          'name': 'The MIT License (MIT)',
+          'path': 'docs/codebase/license',
+          'relpath': 'docs/codebase/license',
+          'text': "# The MIT License (MIT) Copyright \u00a9 `2020` `Open Knowledge Foundation` Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.",
+      },
+          'docs/codebase/migration': {
+          'name': 'Migration',
+          'path': 'docs/codebase/migration',
+          'relpath': 'docs/codebase/migration',
+          'text': "# Migration Frictionless is a logical continuation of many existing packages created for Frictionless Data as though `datapackage` or `tableschema`. Although, most of these packages will be supported going forward, you can migrate to Frictionless, which is Python 3.8+, as it improves many aspects of working with data and metadata. This document also covers migration from one framework\u0027s version to another. ## From v4 to v5 Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version of the framework. Read about a new version of the framework and migration details in this blog: - [Welcome Frictionless Framework (v5)](../../blog/2022/08-22-frictionless-framework-v5.html) ## From dataflows Frictionless Framework provides the `frictionless transform` function for data transformation. It can be used to migrate from `dataflows` or `datapackage-pipelines`: - [Transforming Data](../guides/transforming-data.html) - [Transform Steps](../steps/resource.html) ## From goodtables Frictionless Framework provides the `frictionless validate` function which is in high-level exactly the same as `goodtables validate`. Also `frictionless describe` is an improved version of `goodtables init`. You instead need to use the `frictionless` command instead of the `goodtables` command: - [Validating Data](../guides/validating-data.html) - [Validation Checks](../checks/baseline.html) - [Validation Errors](../errors/metadata.html) ## From datapackage Frictionless Framework has `Package` and `Resource` classes which is almost the same as `datapackage` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Package Class](../framework/package.html) - [Resource Class](../framework/resource.html) ## From tableschema Frictionless Framework has `Schema` and `Field` classes which is almost the same as `tableschema` has: - [Describing Data](../guides/describing-data.html) - [Extracting Data](../guides/extracting-data.html) - [Schema Class](../framework/schema.html) - [Tabular Fields](../fields/any.html) ## From tabulator Frictionless has `Resource` class which is an equivalent of the tabulator\u0027s `Stream` class: - [Extracting Data](../guides/extracting-data.html) - [Resource Class](../framework/resource.html) - [File Schemes](../schemes/aws.html) - [File Formats](../formats/csv.html)",
+      },
+          'docs/codebase/changelog': {
+          'name': 'Changelog',
+          'path': 'docs/codebase/changelog',
+          'relpath': 'docs/codebase/changelog',
+          'text': "# Changelog Here described only the breaking and most significant changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/frictionless-py/commits/main). ## v5.15 - Local development has been migrated to using [Hatch](https://hatch.pypa.io/latest/) ## v5.14 - Rebased packaging on PEP 621 - Extracted experimental application/server from the codebase ## v5.13 - Implemented \"Metadata.from_descriptor(allow_invalid=False)\" (#1501) ## v5.10 - Various architectural and standards-compatibility improvements (minor breaking changes): - Added new Console commands: - list - explore - query - script - convert - publish - Rebased Console commands on Rich (nice output in the Console) - Fixed `extract` returning the results depends on the source type (now it\u0027s always a dictionary indexed by the resource name) - Enforced type safety -- many tabular command will be marked as impossible for non-tabular resources if a type checker is used - Improved `frictionless.Resource(source)` guessing abilities; if you just like to open a table resource use `frictionless.resources.TableResource(path=path)` ## v5.8 - Implemented Implemented `catalog/dataset/package/resource.deference` (#1451) ## v5.7 - Various architectural and standards-compatibility improvements (minor breaking changes): - Improved type detection mechanism (including remote descriptors) - Added `resources` module including `File/Text/Json/TableResource` - Deprecated `resource.type` argument -- use the classes above - Changed `catalog.packages[]` to `catalog.datasets[].package` - Made `resource.schema` optional (`resource.has_schema` is removed) - Made `resource.normpath` optional (`resource.normdata` is removed) - Standards-compatability improvements: profile, stats - Renamed `system/plugin.select_Check/etc` to `system/plugin.select_check_class/etc` ## v5.6 - Added support for `sqlalchemy@2` (#1427) ## v5.5 - Implemented `program/resource.index` preview (#1395) ## v5.4 - Support `dialect.skip_blank_rows` (#1387) ## v5.3 - Support `steps.resource_update` for resource transformations (#1381) ## v5.2 - Added support for `wkt` format in `fields.StringField` (#1363 by @jze) ## v5.1 - Support `descriptor` argument for `actions/program.extract` (#1372) ## v5.0 - Frictionless Framework (v5) is out of Beta and released on PyPi ## v5.0.0b19 - Implemented [CKAN Integration](https://framework.frictionlessdata.io/docs/portals/ckan.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1332)) ## v5.0.0b8 - ForeignKeyError has been extended with additional information: `fieldNames`, `fieldCells`, `referenceName`, and `referenceFieldNames` ## v5.0.0b2 - Implemented [Github Integration](https://framework.frictionlessdata.io/docs/portals/github.html) ([#1185](https://github.com/frictionlessdata/frictionless-py/issues/1185)) ## v5.0.0b1 - First beta version of [Frictionless Framework (v5)](https://framework.frictionlessdata.io/blog/2022/08-22-frictionless-framework-v5.html) ## v4.40 - Added Dialect support to packages (#1137) ## v4.39 - Fixed processing of incompatible decimal char in table schema and data (#1089) - Added support for Time Zone data (#1097) - Improved validation messages by adding `summary` and partial validation details (#1106) - Implemented new feature `summary` (#1127) - `schema.to_summary` - `report.to_summary` - Added CLI command `summary` - Fixed file compression `package.to_zip` (#1104) - Implemented feature to validate single resource (#1112) - Improved error message to notify about invalid fields (#1117) - Fixed type conversion of NaN values for data of type Int64 (#1115) - Exposed valid/invalid flags in CLI `extract` command (#1130) - Implemented feature `package.to_er_diagram` (#1135) ## v4.38 - Implemented `checks.ascii_value` (#1064) - Implemented `checks.deviated_cell` (#1069) - Implemented `detector.field_true/false_values` (#1074) ## v4.37 - Deprecated high-level legacy actions (use class-based alternatives): - `describe_*` - `extract_*` - `transform_*` - `validate_*` ## v4.36 - Implemented pipeline actions: - `pipeline.validate` (will replace `validate_pipeline` in v5) - `pipeline.transform` (will replace `transform_pipeline` in v5) - Implemented inqiury actions: - `inqiury.validate` (will replace `validate_inqiury` in v5) ## v4.35 - Implemented schema actions: - `Schema.describe` (will replace `describe_schema` in v5) - `schema.validate` (will replace `validate_schema` in v5) - Implemented new transform steps: - `steps.field_merge` - `steps.field_pack` ## v4.34 - Implemented package actions: - `Package.describe` (will replace `describe_package` in v5) - `package.extract` (will replace `extract_package` in v5) - `package.validate` (will replace `validate_package` in v5) - `package.transform` (will replace `transform_package` in v5) ## v4.33 - Implemented resource actions: - `Resource.describe` (will replace `describe_resource` in v5) - `resource.extract` (will replace `extract_resource` in v5) - `resource.validate` (will replace `validate_resource` in v5) - `resource.transform` (will replace `transform_resource` in v5) ## v4.32 - Added to_markdown() feature to metadata (#1052) ## v4.31 - Added a feature that allows to export table schema as excel (#1040) - Added nontabular note to validation results to indicate nontabular file (#1046) - Excel stats now shows bytes and hash (#1045) - Added pprint feature which displays metadata in a readable and pretty way (#1039) - Improved error message if resource.data is not a string (#1036) ## v4.29 - Made Detector\u0027s private properties public and writable (#1025) ## v4.28 - Improved an order of the metadata in YAML representation ## v4.27 - Exposed Dialect options via CLI such as `sheet`, `table`, `keys`, and `keyed` (#886) ## v4.26 - Validate \u0027schema.fields[].example\u0027 (#998) ## v4.25 - Allows descriptors that subclass collections.abc.Mapping (#985) ## v4.24 - Added support for `SqlDialect.basepath` (#982) (https://framework.frictionlessdata.io/docs/tutorials/formats/sql-tutorial) ## v4.23 - Added table dimensions check (#985) ## v4.22 - Added \"extract --trusted\" flag ## v4.21 - Added \"--json/yaml\" CLI options for transform ## v4.20 - Improved layout/schema detection algorithms (#945) ## v4.19 - Renamed `inlineDialect.keys` to `inlineDialect.data_keys` due to a conflict with `dict.keys` property ## v4.18 - Normalized metadata properties (increased type safety) ## v4.17 - Add fields, limit, sort and filter options to CkanDialect (#912) ## v4.16 - Implemented `system/plugin.create_candidates` (#893) ## v4.15 - Implemented `system.get/use_http_session` (#892) ## v4.14 - SQL Where Clause (#882) ## v4.13 - Implemented descriptor type detection for `extract/validate` (#881) ## v4.12 - Support external profiles for data package (#864) ## v4.11 - Added `json` argument to `resource.to_snap` ## v4.10 - Support resource/field renaming in transform (#843) ## v4.9 - Support `--path` CLI argument (#829) ## v4.8 - Added support for `Package(innerpath)` argument for unzipping a data package\u0027s descriptor ## v4.7 - Support control/dialect as JSON in CLI (#806) ## v4.6 - Implemented `describe_dialect` and `describe(path, type=\"dialect\")` - Support `--dialect` argument in CLI ## v4.5 - Implemented `Schema.from_jsonschema` (#797) ## v4.4 - Use `field.constraints.maxLength` for SQL\u0027s VARCHAR (#795) ## v4.3 - Implemented `resource.to_view()` (#781) ## v4.2 - Make `fields[].arrayItem` errors more granular (#767) ## v4.1 - Added support for `fields[].arrayItem` (#750) ## v4.0 - Released `frictionless@4` :tada: ## v4.0.0a15 - Updated loaders (#658) (BREAKING) - Renamed `filelike` loader to `stream` loader - Migrated from `text` loader to `buffer` loader ## v4.0.0a14 - Improve transform API (#657) (BREAKING) - Swithed to the `transform_resource(resource)` signature - Swithed to the `transform_package(package)` signature ## v4.0.0a13 - Improved resource/package import/export (#655) (BREAKING) - Reworked `parser.write_row_stream` API - Reworked `resource.from/to` API - Reworked `package.from/to` API - Reworked `Storage` API - Reworked `system.create_storage` API - Merged `PandasStorage` into `PandasParser` - Merged `SpssStorage` into `SpssParser` ## v4.0.0a12 - Improved transformation steps (#650) (BREAKING) - Split value/formula/function concepts - Renamed a few minor step arguments ## v4.0.0a11 - Improved layout and data streams concepts (#648) (BREAKING) - Renamed `data_stream` to `list_stream` - Renamed `readData` to `readLists` - Renamed `sample` to `fragment` (`sample` now is raw lists) - Implemented loader.buffer - Implemented parser.sample - Added support for function based checks - Added support for function based steps ## v4.0.0a10 - Reworked Error.tags (BREAKING) - Reworked Check API and split labels/header (BREAKING) ## v4.0.0a9 - Rebased on `Detector` class (BREAKING) - Migrated all infer_*, sync/patch_schema and detect_encoding parameters to `Detector` - Made `resource.infer` omit empty objects - Added `resource.read_*(size)` argument - Added `resource.labels` property ## v4.0.0a8 - Improved checks/steps API (#621) (BREAKING) - Updated `validate(extra_checks=[...])` to `validate(checks=[{\"code\": \u0027code\u0027, ...}])` ## v4.0.0a7 - Updated describe/extract/transform/validate APIs (BREAKING) - Removed `validate_table` (use `validate_resource`) - Removed legacy `Table` and `File` classes - Removed `dataflows` plugin - Replaced `nopool` by `parallel` (not parallel by default) - Renamed `report.tables` to `report.tasks` - Rebased on `report.tasks[].resource` (instead of plain path/scheme/format/etc) - Flatten Pipeline steps signature ## v4.0.0a6 - Introduced Layout class (BREAKING) - Renamed `Query` class and arguments/properties to `Layout` - Moved `header` options from `Dialect` to `Layout` ## v4.0.0a5 - Updated transform API - Added `transform(type)` argument ## v4.0.0a4 - Updated describe API (BREAKING) - Renamed `describe(source_type)` argument to `type` ## v4.0.0a3 - Updated extract API (BREAKING) - Removed `extract_table` (use `extract_resource` with the same API) - Renamed `extract(source_type)` argument to `type` ## v4.0.0a1 - Initial API/codebase improvements for v4 (BREAKING) - Allow `Package/Resource(source)` notation (guess descriptor/path/etc) - Renamed `schema.infer` -\u003e `Schema.from_sample` - Renamed `resource.inline` -\u003e `resource.memory` - Renamed `compression_path` -\u003e `innerpath` - Renamed `compression: no` -\u003e `compression: \"\"` - Updated `Package/Resource.infer` not to infer stats (use `stats=True`) - Removed `Package/Resource.infer(only_sample)` argument - Removed `Resouce.from/to_zip` (use `Package.from/to_zip`) - Removed `Resouce.source` (use `Resource.data` or `Resource.fullpath`) - Removed `package/resource.infer(source)` argument (use constructors) - Added some new API (will be covered in the updated docs after the v4 release) ## v3.48 - Make Resource independent from Table/File (#607) (BREAKING) - Resource can be opened like Table (it\u0027s recommended to use Resource instead of Table) - Renamed `resource.read_sample()` to `resource.sample` - Renamed `resource.read_header()` to `resource.header` - Renamed `resource.read_stats()` to `resource.stats` - Removed `resource.to_table()` - Removed `resource.to_file()` ## v3.47 - Optimize Row/Header/Table and rename header errors (#601) (BREAKING) - Row object is now lazy; it casts data on-demand preserving the same API - Method `resource/table.read_data(_stream)` now includes a header row if present - Renamed `errors.ExtraHeaderError-\u003eExtraLabelError` (`extra-label-error`) - Renamed `errors.MissingHeaderError-\u003eMissingLabelError` (`missing-label-error`) - Renamed `errors.BlankHeaderError-\u003eBlankLabelError` (`blank-label-error`) - Renamed `errors.DuplicateHeaderError-\u003eDuplicateLabelError` (`duplicate-label-error`) - Renamed `errors.NonMatchingHeaderError-\u003eIncorrectLabelError` (`incorrect-label-error`) - Renamed `schema.read/write_data-\u003eread/write_cells` ## v3.46 - Renamed aws plugin to s3 (#594) (BREAKING) ```bash $ pip install frictionless[aws] # before $ pip install frictionless[s3] # after ``` ## v3.45 - Drafted support for writing Multipart Data (#583) ## v3.44 - Added support for writing to Remote Data (#582) ## v3.43 - Add support to writing to Google Sheets (#581) - Renamed `gsheet` plugin/format to `gsheets` (BREAKING: minor) ## v3.42 - Added support for writing to S3 (#580) ## v3.41 - Update Loader/Parser API to write to different targets (#579) (BREAKING: minor) ## v3.40 - Implemented a standalone multipart loader (#573) ## v3.39 - Fixed Header not being an original one (#572) - Fix bad format validation (#571) - Added default errors limit equals to 1000 (#570) - Added support for field.float_number (#569) ## v3.38 - Improved ckan plugin (#560) ## v3.37 - Remove not working elastic plugin draft (#558) ## v3.36 - Support custom types (#557) ## v3.35 - Added \"resolve\" option to \"resource/package.to_zip\" (#556) ## v3.34 - Moved `frictionless.controls` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.dialects` to `frictionless.plugins.*` (BREAKING) - Moved `frictionless.exceptions.FrictionlessException` to `frictionless.FrictionlessException` (BREAKING) - Moved `excel` dependencies to `frictionless[excel]` extras (BREAKING) - Moved `json` dependencies to `frictionless[json]` extras (BREAKING) - Consider `json` files to be a metadata by default (BREAKING) Code example: ```python # Before # pip install frictionless from frictionless import dialects, exceptions excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = exceptions.FrictionlessException() # After # pip install frictionless[excel,json] from frictionless import FrictionlessException from frictionless.plugins.excel import ExcelDialect from frictionless.plugins.json import JsonDialect excel_dialect = dialects.ExcelDialect() json_dialect = dialects.JsonDialect() exception = FrictionlessException() ``` ## v3.33 - Implemented resource.write (#537) ## v3.32 - Added url parameter to SQL import/export (#535) ## v3.31 - Made tables with header and no data rows valid (#534) (BREAKING: minor) ## v3.30 - Various CLI improvements (#532) - Added autocompletion - Added stdin support - Added \"extract --csv\" - Exposed more options ## v3.29 - Added experimental CKAN support (#528) ## v3.28 - Add a \"nopool\" argument to validate (#527) ## v3.27 - Stop sorting keyed sources as the order is now guaranteed by Python (#512) (BREAKING) ## v3.26 - Added \"nolookup\" argument for validate_package (#515) ## v3.25 - Add transform functionality (#505) - Methods `schema.get/remove_field` now raise if not found (#505) (BREAKING) - Methods `package.get/remove_resource` now raise if not found (#505) (BREAKING) ## v3.24 - Lower case resource.scheme/format/hashing/encoding/compression (#499) (BREAKING) ## v3.23 - Support \"header_case\" option for dialects (#488) ## v3.22 - Added suppport for DB2 format (#485) ## v3.21 - Improved SPSS plugin (#483) - Improved BigQuery plugin (#470) ## v3.20 - Added support for SQL Views (#466) ## v3.19 - Rebased AwsLoader on streaming (#460) ## v3.18 - Added `hashing` parameter to `describe/describe_package` - Removed `table.onerror` property (BREAKING) ## v3.17 - Added timezone for datetime/time parsing (#457) (BREAKING) ## v3.16 - Fixed metadata.to_yaml (#455) - Removed the `expand` argument from `metadata.to_dict` (BREAKING) ## v3.15 - Added native schema support to SqlParser (#452) ## v3.14 - Make Resource the main internal interface (#446) (BREAKING: for plugin authors) - Move Resource\u0027s stats to `resource.stats` (BREAKING) - Rename `on_error` to `onerror` (BREAKING) - Added `resource.stats.fields` ## v3.13 - Add an `on_error` argument to Table/Resource/Package (#445) ## v3.12 - Added streaming to the extract functions (#442) ## v3.11 - Added experimental BigQuery support (#424) ## v3.10 - Added experimental SPSS support (#421) ## v3.9 - Rebased on a `goodtables` successor versioning ## v3.8 - Add support SQL/Pandas import/export (#31) ## v3.7 - Add support for custom JSONEncoder classes (#24) ## v3.6 - Normalize header terminology ## v3.5 - Initial public version",
+      },
+          'docs/codebase/contributing': {
+          'name': 'Contributing',
+          'path': 'docs/codebase/contributing',
+          'relpath': 'docs/codebase/contributing',
+          'text': "# Contributing We welcome contributions from anyone! Please read the following guidelines, and feel free to reach out to us if you have questions. Thanks for your interest in helping make Frictionless awesome! ## Introduction We use Github as a code and issues hosting platform. To report a bug or propose a new feature, please open an issue. For pull requests, we would ask you initially create an issue and then create a pull requests linked to this issue. ## Prerequisites To start working on the project: - Python 3.10+ Install Python headers if they are missing: ```bash sudo apt-get install libpython3.10-dev ``` ## Enviroment For development orchestration we use [Hatch](https://github.com/pypa/hatch) for Python (defined in `pyproject.toml`). We use `make` to run high-level commands (defined in `Makefile`) ```bash pip3 install hatch ``` Before starting with the project we recommend configuring `hatch`. The following line will ensure that all the virtual environments will be stored in the `.python` directory in the project root: ```bash hatch config set \u0027dirs.env.virtual\u0027 \u0027.python\u0027 ``` Now you can setup you IDE to use a proper Python path: ```bash .python/frictionless/bin/python ``` Enter the virtual environment before starting the work. It will ensure that all the development dependencies are installed into a virtual environment: ```bash hatch shell ``` ### Using Docker Use the following command to build the container: ```bash tabs=CLI make docker ``` This should take care of setting up everything. If the container is built without errors, you can then run commands like `make` inside the container to accomplish various tasks (see the next section for details). To make things easier, we can create an alias: ```bash tabs=CLI alias \"frictionless-dev=docker run --rm -v $PWD:/home/frictionless -it frictionless-dev\" ``` Then, for example, to run the tests, we can use: ```bash tabs=CLI frictionless-dev make test ``` ## Development ### Codebase Frictionless is a Python3.8+ framework, and it uses some common Python tools for the development process (we recommend enabling support of these tools in your IDE): - code linting: `ruff` - import sorting: `isort` - code formatting: `black` - type checking: `pyright` - code testing: `pytest` You also need `git` to work on the project, and `make` is recommended. ### Documentation To contribute to the documentation, please find an article in the `docs` folder and update its contents. We write our documentation using [Livemark](https://livemark.frictionlessdata.io). Livemark provides an ability to provide examples without providing an output as it\u0027s generated automatically. It\u0027s possible to run this documentation portal locally: ```bash tabs=CLI livemark start ``` ### Running tests offline VCR library records the response from HTTP requests locally as cassette in its first run. All subsequent calls are run using recorded metadata from previous HTTP request, so it speeds up the testing process. To record a unit test(as cassette), we mark it with a decorator: ```python @pytest.mark.vcr def test_connect_with_server(): pass ``` Cassettee will be recorded as \"test_connect_with_server.yaml\". A new call is made when params change. To skip sensitive data, we can use filters: ```python @pytest.fixture(scope=\"module\") def vcr_config(): return {\"filter_headers\": [\"authorization\"]} ``` #### Regenerating cassettes for CKAN - Setup CKAN local instance: https://github.com/okfn/docker-ckan - Create a sysadmin account and generate api token - Set apikey token in .env file ``` CKAN_APIKEY=*************************** ``` #### Regenerating cassettes for Zenodo **Read** - To read, we need to use live site, the api library uses it by default. - Login to zenodo if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_ACCESS_TOKEN=*************************** ``` **Write** - To write we can use either live site or sandbox. We recommend to use sandbox (https://sandbox.zenodo.org/api/). - Login to zenodo(sandbox) if you have an account and create an access token. - Set access token in .env file. ``` ZENODO_SANDBOX_ACCESS_TOKEN=*************************** ``` - Set base_url in the control params ``` base_url=\u0027base_url=\"https://sandbox.zenodo.org/api/\u0027 ``` #### Regenerating cassettes for Github - Login to github if you have an account and create an access token(Developer settings \u003e Personal access tokens \u003e Tokens). - Set access token and other details in .env file. If email/name of the user is hidden we need to provide those details as well. ``` GITHUB_NAME=FD GITHUB_EMAIL=frictionlessdata@okfn.org GITHUB_ACCESS_TOKEN=*************************** ``` ## Releasing To release a new version: - check that you have push access to the `main` branch - run `hatch version ` to update the version - add changes to `CHANGELOG.md` if it\u0027s not a patch release (major or minor) - run `make release` which create a release commit and tag and push it to Github - an actual release will happen on the Github CI platform after running the tests",
+      },
+          'docs/universe': {
+          'name': 'Universe',
+          'path': 'docs/universe',
+          'relpath': 'docs/universe',
+          'text': "# Universe ## Notebooks - [Frictionless Cars](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-cars.ipynb) - [Frictionless Biology](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/reshaping-data-frictionless.ipynb) - [Frictionless Describe and Extract](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-tutorial-describe-extract.ipynb) - [Frictionless Excel](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-excel.ipynb) - [Frictionless Research Data Management Workflows](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/frictionless-RDM-workflows.ipynb) - [Frictionless Export, Markdown and Other](https://colab.research.google.com/github/frictionlessdata/frictionless-py/blob/v4/site/docs/tutorials/notebooks/tutorial-markdown-export-feature.ipynb)",
+      },
+          'blog/index': {
+          'name': 'Blog',
+          'path': 'blog/index',
+          'relpath': 'blog/index',
+          'text': "# Blog ```html markup {% for item in document.get_plugin(\u0027blog\u0027).items %} {{ item.document.name }} By {{ item.document.get_plugin(\u0027blog\u0027).author }} on {{ item.document.get_plugin(\u0027blog\u0027).date }} {{ item.document.get_plugin(\u0027blog\u0027).description }} Read more \u00bb {% endfor %} ```",
+      },
+          'blog/2022/11-07-zenodo-integration': {
+          'name': 'Zenodo Integration',
+          'path': 'blog/2022/11-07-zenodo-integration',
+          'relpath': 'blog/2022/11-07-zenodo-integration',
+          'text': "# Zenodo Integration Zenodo integration was very highly requested feature and we are happy to share our first draft of the plugin which makes sharing data between frictionless and zenodo easier without any extra work and configuration. This plugin uses zenodopy library underneath to communicate with Zenodo REST API. A frictionless user can use the framework functionalities and then easily publish data to zenodo and viceversa. Here is a short description of the features with examples. ## Reading from the repo You can simply read the package or create a new package from the zenodo repository if package does not exists. No additional configuration is required. The existing ```Package``` class identifies zenodo url and reads the packages and resources from the repo. Example of reading package from the zenodo repo is as follows: ```python tabs=Python from frictionless import Package package = Package(\"https://zenodo.org/record/7078760\") print(package) ``` Once read you can apply all the available functions to the package such as validation, transformation etc. ## Writing/Publishing to the repo To write the package we can simply use `publish` function, which will write the package and resource files to zenodo repository. We need to provide meta data for the repository while publishing data which we pass as meta.json as shown in the example below: ```python tabs=Python from frictionless import Package, portals control = portals.ZenodoControl( metafn=\"data/zenodo/metadata.json\", apikey=apikey ) package = Package(\"data/datapackage.json\") deposition_id = package.publish(control=control) print(deposition_id) ``` Once the package is published, deposition_id will be returned. ## Creating catalog Catalog can be created from a single repository or from multiple repositories. Repositories can be searched using any search terms, phrase, field search or combination of all. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals control=portals.ZenodoControl(search=\u0027title:\"open science\"\u0027) catalog = Catalog( control=control, ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Zenodo Plugin Docs](../../docs/portals/zenodo.html) for more information.",
+      },
+          'blog/2022/09-07-github-integration': {
+          'name': 'Github Integration',
+          'path': 'blog/2022/09-07-github-integration',
+          'relpath': 'blog/2022/09-07-github-integration',
+          'text': "# Github Integration We are happy to announce github plugin which makes sharing data between frictionless and github easier without any extra work and configuration. All the github plugin functionalities are wrapped around the PyGithub library. The main idea is to make the interaction between the framework and github seamless using read and write functions developed on top of the Frictionless python library. Here is a short introduction and examples of the features. ## Reading from the repo Reading package from github repository is made easy! The existing ```Package``` class can identify the github url and read the packages and resources from the repo. It can read packages from repos with or without packages descriptors. If a package descriptor is not defined, it will create a package descriptor with resources that it finds in the repo. ```python tabs=Python from frictionless import Package package = Package(\"https://github.com/fdtester/test-repo-with-datapackage-json\") print(package) ``` ## Writing/Publishing to the repo Writing and publishing can be easily done by passing the repository link using `publish` function. ```python tabs=Python from frictionless import Package, portals apikey = \u0027YOUR-GITHUB-API-KEY\u0027 package = Package(\u0027data/datapackage.json\u0027) response = package.publish(\"https://github.com/fdtester/test-repo-write\", control=portals.GithubControl(apikey=apikey) ) ``` ## Creating catalog Catalog can be created from a single repository by using \u0027search\u0027 queries. Repositories can be searched using combination of any search text and github qualifiers. A simple example of creating catalog from search is as follows: ```python tabs=Python from frictionless import Catalog, portals catalog = Catalog( control=portals.GithubControl(search=\"user:fdtester\", per_page=1, page=1), ) ``` ### Happy Contributors We will have more updates in future and would love to hear from you about this new feature. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Github Plugin Docs](../../docs/portals/github.html) for more information.",
+      },
+          'blog/2022/08-22-frictionless-framework-v5': {
+          'name': 'Welcome Frictionless Framework (v5)',
+          'path': 'blog/2022/08-22-frictionless-framework-v5',
+          'relpath': 'blog/2022/08-22-frictionless-framework-v5',
+          'text': "# Welcome Frictionless Framework (v5) We\u0027re releasing a first beta of Frictionless Framework (v5)! Since the initial Frictionless Framework release we\u0027d been collecting feedback and analyzing both high-level users\u0027 needs and bug reports to identify shortcomings and areas that can be improved in the next version for the framework. Once that process had been done we started working on a new v5 with a goal to make the framework more bullet-proof, easy to maintain and simplify user interface. Today, this version is almost stable and ready to be published. Let\u0027s go through the main improvements we have made: ## Improved Metadata This year we started working on the Frictionless Application, at the same time, we were thinking about next steps for the [Frictionless Standards](https://specs.frictionlessdata.io/). For both we need well-defined and an easy-to-understand metadata model. Partially it\u0027s already published as standards like Table Schema and partially it\u0027s going to be published as standards like File Dialect and possibly validation/transform metadata. ### Dialect In v4 of the framework we had Control/Dialect/Layout concepts to describe resource details related to different formats and schemes, as well as tabular details like header rows. In v5 it\u0027s merged into the only one concept called Dialect which is going to be standardised as a File Dialect spec. Here is an example: ```yaml tabs=YAML header: true headerRows: [2, 3] commentChar: \u0027#\u0027 csv: delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect(header=True, header_rows=[2, 3], comment_char=\u0027#\u0027) dialect.add_control(formats.CsvControl(delimiter=\u0027;\u0027)) print(dialect) ``` A dialect descriptor can be saved and reused within a resource. Technically, it\u0027s possible to provide different schemes and formats settings within one Dialect (e.g. for CSV and Excel) so it\u0027s possible to create e.g. one re-usable dialect for a data package. A legacy CSV Dialect spec is supported and will be supported forever so it\u0027s possible to provide CSV properties on the root level: ```yaml tabs=YAML header: true delimiter: \u0027;\u0027 ``` ```python tabs=Python from frictionless import Dialect, Control, formats dialect = Dialect.from_descriptor({\"header\": True, \"delimiter\": \u0027;\u0027}) print(dialect) ``` For performance and codebase maintainability reasons some marginal Layout features have been removed completely such as `skip/pick/limit/offsetFields/etc`. It\u0027s possible to achieve the same results using the Pipeline concept as a part of the transformation workflow. Read an article about [Dialect Class](../../docs/framework/dialect.html) for more information. ### Checklist Checklist is a new concept introduced in v5. It\u0027s basically a collection of validation steps and a few other settings to make \"validation rules\" sharable. For example: ```yaml tabs=YAML checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 skipErrors: - duplicate-label ``` ```python tabs=Python from frictionless import Checklist, checks checklist = Checklist( checks=[checks.ascii_value(), checks.row_constraint(formula=\u0027id \u003e 1\u0027)], skip_errors=[\u0027duplicate-label\u0027], ) print(checklist) ``` Having and sharing this checklist it\u0027s possible to tune data quality requirements for some data file or set of data files. This concept will provide an ability for creating data quality \"libraries\" within projects or domains. We can use a checklist for validation: ```bash tabs=CLI frictionless validate table1.csv --checklist checklist.yaml frictionless validate table2.csv --checklist checklist.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------- | ------------------------------ Check(descriptor) | Check.from_descriptor(descriptor) check.code | check.type Read an article about [Checklist Class](../../docs/framework/checklist.html) for more information. ### Pipeline In v4 Pipeline was a complex concept similar to validation Inquiry. We reworked it for v5 to be a lightweight set of validation steps that can be applied to a data resource or a data package. For example: ```yaml tabs=YAML steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` ```python tabs=Python from frictionless import Pipeline, steps pipeline = Pipeline( steps=[steps.table_normalize(), steps.cell_set(field_name=\u0027version\u0027, value=\u0027v5\u0027)], ) print(pipeline) ``` Similar to the Checklist concept, Pipeline is a reusable (data-abstract) object that can be saved to a descriptor and used in some complex data workflow: ```bash tabs=CLI frictionless transform table1.csv --pipeline pipeline.yaml frictionless transform table2.csv --pipeline pipeline.yaml ``` Here is a list of another changes: From (v4) | To (v5) ------------------ | ------------------------------ Step(descriptor) | Step.from_descriptor(descriptor) step.code | step.type Read an article about [Pipeline Class](../../docs/framework/pipeline.html) for more information. ### Resource ```markdown remark type=warning Since `frictionless@5.7` this experimental feature (`resource.checklist/pipeline`) has been disabled to conform better with the standards. ``` There are no changes in the Resource related to the standards although currently by default instead of `profile` the `type` property will be used to mark a resource as a table. It can be changed using the `--standards v1` flag. It\u0027s now possible to set Checklist and Pipeline as a Resource property similar to Dialect and Schema: ```yaml tabs=YAML path: table.csv # ... checklist: checks: - type: ascii-value - type: row_constraint formula: id \u003e 1 pipeline: pipeline.yaml steps: - type: table-normalize - type: cell-set fieldName: version value: v5 ``` Or using dereference: ```yaml tabs=YAML path: table.csv # ... checklist: checklist.yaml pipeline: pipeline.yaml ``` In this case the validation/transformation will use it by default providing an ability to ship validation rules and transformation pipelines within resources and packages. This is an important development for data publishers who want to define what they consider to be valid for their datasets as well as sharing raw data with a cleaning pipeline steps: ```bash tabs=CLI frictionless validate resource.yaml # will use the checklist above frictionless transform resource.yaml # will use the pipeline above ``` There are minor changes in the `stats` property. Now it uses named keys to simplify hash distinction (md5/sha256 are calculated by default and it\u0027s not possible to change for performance reasons as it was in v4): ```python tabs=Python from frictionless import describe resource = describe(\u0027table.csv\u0027, stats=True) print(resource.stats) ``` Here is a list of another changes: From (v4) | To (v5) -------------------- | ------------------------------ for row in resource: | for row in resource.row_stream Read an article about [Resource Class](../../docs/framework/resource.html) for more information. ### Package There are no changes in the Package related to the standards although it\u0027s now possible to use resource dereference: ```yaml tabs=YAML name: package resources: - resource1.yaml - resource2.yaml ``` Read an article about [Package Class](../../docs/framework/package.html) for more information. ### Catalog ```markdown remark type=warning Since `frictionless@5.7` this experimental feature is changes and now it requires `catalog.datasets[].package` structure. ``` Catalog is a new concept that is a collection of data packages that can be written inline or using dereference: ```yaml tabs=YAML name: catalog packages: - package1.yaml - package2.yaml ``` Read an article about [Catalog Class](../../docs/framework/catalog.html) for more information. ### Detector Detector is now a metadata class (it wasn\u0027t in v4) so it can be saved and shared as other metadata classes: ```python tabs=Python from frictionless import Detector detector = Detector(sample_size=1000) print(detector) ``` Read an article about [Detector Class](../../docs/framework/detector.html) for more information. ### Inquiry There are few changes in the Inquiry concept which is known for using in the [Frictionless Repository](https://repository.frictionlessdata.io/) project: From (v4) | To (v5) ------------------ | ------------------- inquiryTask.source | inquiryTask.path inquiryTask.source | inquiryTask.resource inquiryTask.source | inquiryTask.package Read an article about [Inquiry Class](../../docs/framework/inquiry.html) for more information. ### Report The Report concept has been significantly simplified by removing the `resource` property from `reportTask`. It\u0027s been replaced by `name/type/place/labels` properties. Also `report.time` is now `report.stats.seconds`. The `report/reportTask.warnings: List[str]` have been added to provide non-error information like reached limits: ```bash tabs=CLI output=yaml frictionless validate table.csv --yaml ``` Here is a list of changes: From (v4) | To (v5) ----------------- | ------------------- report.time | report.stats.seconds reportTask.time | reportTask.stats.seconds reportTask.resource.name | reportTask.name reportTask.resource.profile | reportTask.type reportTask.resource.path | reportTask.place reportTask.resource.schema | reportTask.labels Read an article about [Report Class](../../docs/framework/report.html) for more information. ### Schema Changes in the Schema class: From (v4) | To (v5) -------------------- | ------------------------------ Schema(descriptor) | Schema.from_descriptor(descriptor) ### Error There are a few changes in the Error data structure: From (v4) | To (v5) ---------------- | ------------------- error.code | error.type error.name | error.title error.rowPosition | error.rowNumber error.fieldPosition | error.fieldNumber ### Types Note that all the metadata entities that have multiple implementations in v5 are based on a unified type model. It means that they use the `type` property to provide type information: From (v4) | To (v5) ---------------- | ------------------- resource.profile | resource.type check.code | check.type control.code | control.type error.code | error.type field.type | field.type step.type | step.type The new v5 version still supports old notation in descriptors for backward-compatibility. ## Improved Model It\u0027s been many years that Frictionless were mixing declarative metadata and object model for historical reasons. Since the first implementation of `datapackage` library we used different approaches to sync internal state to provide both interfaces descriptor and object model. In Frictionless Framework v4 this technique had been taken to a really sophisticated level with special observables dictionary classes. It was quite smart and nice-to-use for quick prototyping in REPL but it was really hard to maintain and error-prone. In Framework v5 we finally decided to follow the \"right way\" for handling this problem and split descriptors and object model completely. ### Descriptors In the Frictionless World we deal with a lot of declarative metadata descriptors such as packages, schemas, pipelines, etc. Nothing changes in v5 regarding this. So for example here is a Table Schema: ```yaml tabs=YAML fields: - name: id type: integer - name: name type: string ``` ### Object Model The difference comes here we we create a metadata instance based on this descriptor. In v4 all the metadata classes were a subclasses of the dict class providing a mix between a descriptor and object model for state management. In v5 there is a clear boundary between descriptor and object model. All the state are managed as it should be in a normal Python class using class attributes: ```python tabs=Python from frictionless import Schema schema = Schema.from_descriptor(\u0027schema.yaml\u0027) # Here we deal with a proper object model descriptor = schema.to_descriptor() # Here we export it back to be a descriptor ``` There are a few important traits of the new model: - it\u0027s not possible to create a metadata instance from an invalid descriptor - it\u0027s almost always guaranteed that a metadata instance is valid - it\u0027s not possible to mix dicts and classes in methods like `package.add_resource` - it\u0027s not possible to export an invalid descriptor This separation might make one to add a few additional lines of code, but it gives us much less fragile programs in the end. It\u0027s especially important for software integrators who want to be sure that they write working code. At the same time, for quick prototyping and discovery Frictionless still provides high-level actions like `validate` function that are more forgiving regarding user input. ### Static Typing One of the most important consequences of \"fixing\" state management in Frictionless is our new ability to provide static typing for the framework codebase. This work is in progress but we have already added a lot of types and it successfully pass `pyright` validation. We highly recommend enabling `pyright` in your IDE to see all the type problems in-advance: ```yaml image path: ../../assets/type-error.png height: unset width: unset ``` ## Livemark Docs We\u0027re happy to announce that we\u0027re finally ready to drop a JavaScript dependency for the docs generation as we migrated it to Livemark. Moreover, Livemark\u0027s ability to execute scripts inside the documentation and other nifty features like simple Tabs or a reference generator will save us hours and hours for writing better docs. ### Script Execution ```yaml image path: ../../assets/livemark-1.png height: unset width: unset ``` ### Reference Generation ```yaml image path: ../../assets/livemark-2.png height: unset width: unset ``` ### Happy Contributors We hope that Livemark docs writing experience will make our contributors happier and allow to grow our community of Frictionless Authors and Users. Let\u0027s chat in our [Slack](https://join.slack.com/t/frictionlessdata/shared_invite/zt-17kpbffnm-tRfDW_wJgOw8tJVLvZTrBg) if you have questions or just want to say hi. Read [Livemark Docs](https://livemark.frictionlessdata.io/) for more information.",
+      },
+      };
+  const searchIndex = lunr(function () {
+    this.ref("path")
+    this.field("name", { boost: 10 })
+    this.field("text")
+    for (const item of Object.values(searchItems)) {
+      this.add(item)
+    }
+  });
+  const searchOutput = document.getElementById('livemark-search-output')
+  const searchInput = document.getElementById('livemark-search-input')
+  searchInput.addEventListener('input', search)
+  prepare()
+  search()
+});
+
+</script>
+
+<div id="livemark-search">
+  <div id="livemark-search-output"></div>
+  <input id="livemark-search-input" type="search" placeholder="Search...">
+</div>
+<script>
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add buttons
+  $("h2").append('<a href="" class="livemark-source-button">Source</a>');
+
+  // Enable buttons
+  $(".livemark-source-button").click(async (ev) => {
+    ev.preventDefault();
+
+    // Close open
+    if ($(".livemark-source-section").length) {
+      $(".livemark-source-section").remove();
+      return;
+    }
+
+    // Load content
+    let source = location.href.replace(".html", ".md");
+    if (!source.endsWith(".md")) source = `${source}index.md`;
+    const heading = $(ev.target).parent().contents().get(0).nodeValue;
+    response = await fetch(source);
+    content = await response.text();
+
+    // Extract section
+    let isCapture;
+    const lines = [];
+    for (const line of content.split(/\r?\n/)) {
+      if (line.startsWith("##")) {
+        isCapture = line.startsWith(`## ${heading}`) ? true : false;
+        continue;
+      }
+      if (isCapture) {
+        lines.push(line);
+      }
+    }
+    const section = _.escape(lines.join("\n").trim());
+
+    // Show section
+    $(ev.target)
+      .parent()
+      .after(
+        `<pre class="livemark-source-section" style="white-space: pre-wrap;">${section}</pre>`
+      );
+  });
+});
+
+</script>
+</body>
+</html>
\ No newline at end of file